def __init__(self, cfg): """A recurrent GAN model, each time step a generated image (x'_{t-1}) and the current question q_{t} are fed to the RNN to produce the conditioning vector for the GAN. The following equations describe this model: - c_{t} = RNN(h_{t-1}, q_{t}, x^{~}_{t-1}) - x^{~}_{t} = G(z | c_{t}) """ super(RecurrentGAN_Mingyang, self).__init__() # region Models-Instantiation ###############################Original DataParallel################### self.generator = DataParallel( GeneratorFactory.create_instance(cfg)).cuda() self.discriminator = DataParallel( DiscriminatorFactory.create_instance(cfg)).cuda() self.rnn = nn.DataParallel(nn.GRU(cfg.input_dim, cfg.hidden_dim, batch_first=False), dim=1).cuda() # self.rnn = DistributedDataParallel(nn.GRU(cfg.input_dim, # cfg.hidden_dim, # batch_first=False), dim=1).cuda() self.layer_norm = nn.DataParallel(nn.LayerNorm(cfg.hidden_dim)).cuda() self.image_encoder = DataParallel(ImageEncoder(cfg)).cuda() self.condition_encoder = DataParallel(ConditionEncoder(cfg)).cuda() self.sentence_encoder = nn.DataParallel(SentenceEncoder(cfg)).cuda() ####################################################################### # self.generator = GeneratorFactory.create_instance(cfg).cuda() # self.discriminator = DiscriminatorFactory.create_instance(cfg).cuda() # self.rnn = nn.GRU(cfg.input_dim,cfg.hidden_dim,batch_first=False).cuda() # # self.rnn = DistributedDataParallel(nn.GRU(cfg.input_dim, # # cfg.hidden_dim, # # batch_first=False), dim=1).cuda() # self.layer_norm = nn.LayerNorm(cfg.hidden_dim).cuda() # self.image_encoder = =ImageEncoder(cfg).cuda() # self.condition_encoder = ConditionEncoder(cfg).cuda() # self.sentence_encoder = SentenceEncoder(cfg).cuda() # endregion # region Optimizers self.generator_optimizer = OPTIM[cfg.generator_optimizer]( self.generator.parameters(), cfg.generator_lr, cfg.generator_beta1, cfg.generator_beta2, cfg.generator_weight_decay) self.discriminator_optimizer = OPTIM[cfg.discriminator_optimizer]( self.discriminator.parameters(), cfg.discriminator_lr, cfg.discriminator_beta1, cfg.discriminator_beta2, cfg.discriminator_weight_decay) self.rnn_optimizer = OPTIM[cfg.rnn_optimizer](self.rnn.parameters(), cfg.rnn_lr) self.sentence_encoder_optimizer = OPTIM[cfg.gru_optimizer]( self.sentence_encoder.parameters(), cfg.gru_lr) self.use_image_encoder = cfg.use_fg feature_encoding_params = list(self.condition_encoder.parameters()) if self.use_image_encoder: feature_encoding_params += list(self.image_encoder.parameters()) self.feature_encoders_optimizer = OPTIM['adam']( feature_encoding_params, cfg.feature_encoder_lr) # endregion # region Criterion self.criterion = LOSSES[cfg.criterion]() self.aux_criterion = DataParallel(torch.nn.BCELoss()).cuda() #Added by Mingyang for segmentation loss if cfg.balanced_seg: label_weights = np.array([ 3.02674201e-01, 1.91545454e-03, 2.90009221e-04, 7.50949673e-04, 1.08670452e-03, 1.11353785e-01, 4.00971053e-04, 1.06240113e-02, 1.59590824e-01, 5.38960105e-02, 3.36431602e-02, 3.99029734e-02, 1.88888847e-02, 2.06441476e-03, 6.33775290e-02, 5.81920411e-03, 3.79528817e-03, 7.87975754e-02, 2.73547355e-03, 1.08308135e-01, 0.00000000e+00, 8.44408475e-05 ]) #reverse the loss label_weights = 1 / label_weights label_weights[20] = 0 label_weights = label_weights / np.min(label_weights[:20]) #convert numpy to tensor label_weights = torch.from_numpy(label_weights) label_weights = label_weights.type(torch.FloatTensor) self.seg_criterion = DataParallel( torch.nn.CrossEntropyLoss(weight=label_weights)).cuda() else: self.seg_criterion = DataParallel( torch.nn.CrossEntropyLoss()).cuda() # endregion self.cfg = cfg self.logger = Logger(cfg.log_path, cfg.exp_name) # define unorm self.unorm = UnNormalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
part_optimizer = torch.optim.SGD(part_parameters, lr=LR, momentum=0.9, weight_decay=WD) partcls_optimizer = torch.optim.SGD(partcls_parameters, lr=LR, momentum=0.9, weight_decay=WD) schedulers = [ MultiStepLR(raw_optimizer, milestones=[60, 100], gamma=0.1), MultiStepLR(concat_optimizer, milestones=[60, 100], gamma=0.1), MultiStepLR(part_optimizer, milestones=[60, 100], gamma=0.1), MultiStepLR(partcls_optimizer, milestones=[60, 100], gamma=0.1) ] net = net.cuda() net = DataParallel(net) for epoch in range(start_epoch, 500): for scheduler in schedulers: scheduler.step() # begin training _print('--' * 50) net.train() for i, data in enumerate(trainloader): img, label = data[0].cuda(), data[1].cuda() batch_size = img.size(0) raw_optimizer.zero_grad() part_optimizer.zero_grad() concat_optimizer.zero_grad() partcls_optimizer.zero_grad()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--device', default='0,1,2,3', type=str, required=False, help='设置使用哪些显卡') parser.add_argument('--model_config', default='config/model_config_small.json', type=str, required=False, help='选择模型参数') parser.add_argument('--tokenizer_path', default='cache/vocab_processed.txt', type=str, required=False, help='选择词库') parser.add_argument('--raw_data_path', default='data/train.json', type=str, required=False, help='原始训练语料') parser.add_argument('--tokenized_data_path', default='data/tokenized/', type=str, required=False, help='tokenized语料存放位置') parser.add_argument('--raw', action='store_true', help='是否先做tokenize') parser.add_argument('--epochs', default=50, type=int, required=False, help='训练循环') parser.add_argument('--batch_size', default=1, type=int, required=False, help='训练batch size') parser.add_argument('--lr', default=1.5e-4, type=float, required=False, help='学习率') parser.add_argument('--warmup_steps', default=2000, type=int, required=False, help='warm up步数') parser.add_argument('--log_step', default=1, type=int, required=False, help='多少步汇报一次loss') parser.add_argument('--stride', default=768, type=int, required=False, help='训练时取训练数据的窗口步长') parser.add_argument('--gradient_accumulation', default=1, type=int, required=False, help='梯度积累') parser.add_argument('--fp16', action='store_true', help='混合精度') parser.add_argument('--fp16_opt_level', default='O1', type=str, required=False) parser.add_argument('--max_grad_norm', default=1.0, type=float, required=False) parser.add_argument('--num_pieces', default=100, type=int, required=False, help='将训练语料分成多少份') parser.add_argument('--output_dir', default='model/', type=str, required=False, help='模型输出路径') parser.add_argument('--pretrained_model', default='', type=str, required=False, help='模型训练起点路径') parser.add_argument('--segment', action='store_true', help='中文以词为单位') parser.add_argument('--google_driver_save', action='store_true', help='是否保存模型到谷歌云盘') args = parser.parse_args() print('args:\n' + args.__repr__()) if args.segment: from tokenizations import tokenization_bert_word_level as tokenization_bert else: from tokenizations import tokenization_bert os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡 model_config = transformers.modeling_gpt2.GPT2Config.from_json_file( args.model_config) print('config:\n' + model_config.to_json_string()) n_ctx = model_config.n_ctx full_tokenizer = tokenization_bert.BertTokenizer( vocab_file=args.tokenizer_path) #full_tokenizer.max_len = 999999 device = 'cuda' if torch.cuda.is_available() else 'cpu' print('using device:', device) raw_data_path = args.raw_data_path tokenized_data_path = args.tokenized_data_path raw = args.raw # 选择是否从零开始构建数据集 epochs = args.epochs batch_size = args.batch_size lr = args.lr warmup_steps = args.warmup_steps log_step = args.log_step stride = args.stride gradient_accumulation = args.gradient_accumulation fp16 = args.fp16 # 不支持半精度的显卡请勿打开 fp16_opt_level = args.fp16_opt_level max_grad_norm = args.max_grad_norm num_pieces = args.num_pieces output_dir = args.output_dir if raw: print('building files') build_files(raw_data_path=raw_data_path, tokenized_data_path=tokenized_data_path, full_tokenizer=full_tokenizer, num_pieces=num_pieces) print('files built') if not args.pretrained_model: model = transformers.modeling_gpt2.GPT2LMHeadModel(config=model_config) else: model = transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained( args.pretrained_model) model.train() model.to(device) multi_gpu = False full_len = 0 print('calculating total steps') for i in tqdm(range(num_pieces)): with open(tokenized_data_path + 'tokenized_train_{}.txt'.format(i), 'r') as f: full_len += len([int(item) for item in f.read().strip().split()]) total_steps = int(full_len / stride * epochs / batch_size / gradient_accumulation) print('total steps = {}'.format(total_steps)) optimizer = transformers.AdamW(model.parameters(), lr=lr, correct_bias=True) scheduler = transformers.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps, t_total=total_steps) if fp16: try: from apex import amp except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) model, optimizer = amp.initialize(model, optimizer, opt_level=fp16_opt_level) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = DataParallel(model) multi_gpu = True print('starting training') running_loss = 0 for epoch in range(epochs): print('epoch {}'.format(epoch + 1)) now = datetime.now() print('time: {}'.format(now)) x = np.linspace(0, num_pieces - 1, num_pieces, dtype=np.int32) random.shuffle(x) piece_num = 0 for i in x: with open(tokenized_data_path + 'tokenized_train_{}.txt'.format(i), 'r') as f: line = f.read().strip() tokens = line.split() tokens = [int(token) for token in tokens] start_point = 0 samples = [] while start_point < len(tokens) - n_ctx: samples.append(tokens[start_point:start_point + n_ctx]) start_point += stride if start_point < len(tokens): samples.append(tokens[len(tokens) - n_ctx:]) random.shuffle(samples) for step in range(len(samples) // batch_size): # prepare data batch = samples[step * batch_size:(step + 1) * batch_size] batch_labels = [] batch_inputs = [] for ids in batch: int_ids_for_labels = [int(x) for x in ids] int_ids_for_inputs = [int(x) for x in ids] batch_labels.append(int_ids_for_labels) batch_inputs.append(int_ids_for_inputs) batch_labels = torch.tensor(batch_labels).long().to(device) batch_inputs = torch.tensor(batch_inputs).long().to(device) # forward pass outputs = model.forward(input_ids=batch_inputs, labels=batch_labels) loss, logits = outputs[:2] # get loss if multi_gpu: loss = loss.mean() if gradient_accumulation > 1: loss = loss / gradient_accumulation # loss backward if fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() torch.nn.utils.clip_grad_norm_( amp.master_params(optimizer), max_grad_norm) else: loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) # optimizer step if (step + 1) % gradient_accumulation == 0: running_loss += loss.item() optimizer.step() optimizer.zero_grad() scheduler.step() if (step + 1) % log_step == 0: print( 'now time: {}:{}. Step {} of piece {} of epoch {}, loss {}' .format(datetime.now().hour, datetime.now().minute, (step + 1) // gradient_accumulation, piece_num, epoch + 1, running_loss / log_step)) running_loss = 0 piece_num += 1 print('saving model for epoch {}'.format(epoch + 1)) if not os.path.exists(output_dir + 'model_epoch{}'.format(epoch + 1)): os.mkdir(output_dir + 'model_epoch{}'.format(epoch + 1)) model_to_save = model.module if hasattr(model, 'module') else model model_to_save.save_pretrained(output_dir + 'model_epoch{}'.format(epoch + 1)) # torch.save(scheduler.state_dict(), output_dir + 'model_epoch{}/scheduler.pt'.format(epoch + 1)) # torch.save(optimizer.state_dict(), output_dir + 'model_epoch{}/optimizer.pt'.format(epoch + 1)) print('epoch {} finished'.format(epoch + 1)) then = datetime.now() print('time: {}'.format(then)) print('time for one epoch: {}'.format(then - now)) print('training finished') if not os.path.exists(output_dir + 'final_model'): os.mkdir(output_dir + 'final_model') model_to_save = model.module if hasattr(model, 'module') else model model_to_save.save_pretrained(output_dir + 'final_model') # torch.save(scheduler.state_dict(), output_dir + 'final_model/scheduler.pt') # torch.save(optimizer.state_dict(), output_dir + 'final_model/optimizer.pt') if args.google_driver_save: # Import PyDrive and associated libraries. # This only needs to be done once in a notebook. from pydrive.auth import GoogleAuth from pydrive.drive import GoogleDrive from google.colab import auth from oauth2client.client import GoogleCredentials # Authenticate and create the PyDrive client. # This only needs to be done once in a notebook. auth.authenticate_user() gauth = GoogleAuth() gauth.credentials = GoogleCredentials.get_application_default() drive = GoogleDrive(gauth) # Create & upload a text file. uploaded = drive.CreateFile({"title": "config.json"}) # Read file and set it as a content of this instance. uploaded.SetContentFile( '/content/GPT-2-Train/model/final_model/config.json') uploaded.Upload() # Upload the file. print('Uploaded file with ID {}'.format(uploaded.get('id'))) uploaded = drive.CreateFile({"title": "pytorch_model.bin"}) # Read file and set it as a content of this instance. uploaded.SetContentFile( '/content/GPT-2-Train/model/final_model/pytorch_model.bin') uploaded.Upload() # Upload the file. print('Uploaded file with ID {}'.format(uploaded.get('id')))
def prepare(config): if 'hardmining' in config.prepare and config.prepare['hardmining']: from datasets import myDataset2 as myDataset else: from datasets import myDataset from datasets import myDataset as myDataset_val env_utils.setEnvironment(config) if config.debug: model = ModelLoader.load(config.net["model"], config=config, abn=1) elif config.jit: model = ModelLoader.load(config.net["model"], config=config, abn=0) else: model = ModelLoader.load(config.net["model"], config=config) loss = LossLoader.load(config.net["rpnloss"], config) em_names = config.net['em'] em_list = [] for ems in em_names: em_list.append(LossLoader.load(ems, config)) netio = netIOer(config) if config.half: model = model.half() model = BN_convert_float(model) if config.net["load_weight"] != '': model, config = netio.load_file(model, config.net["load_weight"]) # optimizer = optim.SGD(model.parameters(), lr= config.train['lr_arg'], momentum=0.9, # weight_decay=config.train['weight_decay']) model = model.cuda() if config.jit: netio.trace(model) sys.exit() loss = loss.cuda() warp = warpLoss(model, loss, config.prepare['margin']) if not config.debug: warp = DataParallel(warp) trainer = Trainer(warp, config, netio, emlist=em_list) train_data = myDataset(config, 'train') if config.valtrain: val_data = myDataset_val(config, 'valtrain') else: val_data = myDataset_val(config, 'val') print(config.augtype) print(config.env['cpu_num']) train_loader = DataLoader(train_data, batch_size=config.train['batch_size'], shuffle=True, num_workers=config.env['cpu_num'], drop_last=True, pin_memory=True, worker_init_fn=np.random.seed) val_loader = DataLoader(val_data, batch_size=1, shuffle=False, num_workers=5, pin_memory=True, collate_fn=lambda x: x) return config, model, loss, warp, trainer, train_data, val_data, train_loader, val_loader
def main(args): device = 'cuda' if torch.cuda.is_available() else 'cpu' with open('data/multim_poem.json') as f, open('data/unim_poem.json') as unif: multim = json.load(f) unim = json.load(unif) multim = util.filter_multim(multim) # multim = multim[:128] with open('data/img_features.pkl', 'rb') as fi, open('data/poem_features.pkl', 'rb') as fp: img_features = pickle.load(fi) poem_features = pickle.load(fp) # make sure vocab exists word2idx, idx2word = util.read_vocab_pickle(args.vocab_path) # will be used in embedder if args.source == 'unim': data = unim features = poem_features elif args.source == 'multim': data = multim features = img_features else: print('Error: source must be unim or multim!') exit() # create data loader. the data will be in decreasing order of length data_loader = get_poem_poem_dataset(args.batch_size, shuffle=True, num_workers=args.num_workers, json_obj=data, features=features, max_seq_len=128, word2idx=word2idx, tokenizer=None) decoder = DecoderRNN(args.embed_size, args.hidden_size, len(word2idx), device) decoder = DataParallel(decoder) if args.restore: decoder.load_state_dict(torch.load(args.ckpt)) if args.load: decoder.load_state_dict(torch.load(args.load)) decoder.to(device) discriminator = Discriminator(args.embed_size, args.hidden_size, len(word2idx), num_labels=2) discriminator.embed.weight = decoder.module.embed.weight discriminator = DataParallel(discriminator) if args.restore: discriminator.load_state_dict(torch.load(args.disc)) discriminator.to(device) # optimization config criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 10], gamma=0.33) optimizerD = torch.optim.Adam(discriminator.parameters(), lr=args.learning_rate) sys.stderr.write('Start training...\n') total_step = len(data_loader) decoder.train() global_step = 0 running_ls = 0 for epoch in range(args.num_epochs): scheduler.step() acc_ls = 0 start = time.time() for i, (batch) in enumerate(data_loader): poem_embed, ids, lengths = [t.to(device) for t in batch] targets = pack_padded_sequence(ids[:, 1:], lengths, batch_first=True)[0] # train discriminator # train with real discriminator.zero_grad() pred_real = discriminator(ids[:, 1:], lengths) real_label = torch.ones(ids.size(0), dtype=torch.long).to(device) loss_d_real = criterion(pred_real, real_label) loss_d_real.backward(torch.ones_like(loss_d_real), retain_graph=True) # train with fake logits = decoder(poem_embed, ids, lengths) weights = F.softmax(logits, dim=-1) m = Categorical(probs=weights) generated_ids = m.sample() # generated_ids = torch.argmax(logits, dim=-1) pred_fake = discriminator(generated_ids.detach(), lengths) fake_label = torch.zeros(ids.size(0)).long().to(device) loss_d_fake = criterion(pred_fake, fake_label) loss_d_fake.backward(torch.ones_like(loss_d_fake), retain_graph=True) loss_d = loss_d_real.mean().item() + loss_d_fake.mean().item() optimizerD.step() # train generator decoder.zero_grad() reward = F.softmax(pred_fake, dim=-1)[:, 1].unsqueeze(-1) loss_r = -m.log_prob(generated_ids) * reward loss_r.backward(torch.ones_like(loss_r), retain_graph=True) loss_r = loss_r.mean().item() loss = criterion(pack_padded_sequence(logits, lengths, batch_first=True)[0], targets) loss.backward(torch.ones_like(loss)) loss = loss.mean().item() # loss = loss_r running_ls += loss acc_ls += loss for param in decoder.parameters(): torch.nn.utils.clip_grad_norm_(param, 0.25) optimizer.step() global_step += 1 if global_step % args.log_step == 0: elapsed_time = time.time() - start iters_per_sec = (i + 1) / elapsed_time remaining = (total_step - i - 1) / iters_per_sec remaining_fmt = time.strftime("%H:%M:%S", time.gmtime(remaining)) elapsed_fmt = time.strftime("%H:%M:%S", time.gmtime(elapsed_time)) print('[{}/{}, {}/{}], ls_d:{:.2f}, ls_r:{:.2f} ls: {:.2f}, Acc: {:.2f} Perp: {:5.2f} {:.3}it/s {}<{}' .format(epoch+1, args.num_epochs, i+1, total_step, loss_d, loss_r, running_ls / args.log_step, acc_ls / (i+1), np.exp(acc_ls / (i+1)), iters_per_sec, elapsed_fmt, remaining_fmt ) ) running_ls = 0 if global_step % args.save_step == 0: torch.save(decoder.state_dict(), args.ckpt) torch.save(discriminator.state_dict(), args.disc) torch.save(decoder.state_dict(), args.save) torch.save(discriminator.state_dict(), args.disc)
def setup(self, model): # model needs to be moved to the device before it is wrapped model.to(self.root_device) self._model = DataParallel(LightningParallelModule(model), self.parallel_devices)
def train_net(num_workers: int = 0): """ Args: num_workers(int, optional): DataLoaderで使用するCPU数. 1 GPU に対して 2CPU 使用する程度で良い. Default to 0. """ net = ResNet18_8s(ver_dim=vote_num * 2, seg_dim=2) net = NetWarpper(net) net = DataParallel(net).cuda() optimizer = optim.Adam(net.parameters(), lr=train_cfg["lr"]) model_dir = os.path.join(cfg.MODEL_DIR, train_cfg["model_name"]) motion_model = train_cfg["motion_model"] print("motion state {}".format(motion_model)) if args.test_model: torch.manual_seed(0) begin_epoch = load_model(net.module.net, optimizer, model_dir, args.load_epoch) if args.normal: print("testing normal linemod ...") img_db = PVNetLineModImageDB(args.linemod_cls, has_render_set=False, has_fuse_set=False) test_db = img_db.test_real_set + img_db.val_real_set test_set = PVNetLineModDatasetRealAug( test_db, cfg.PVNET_LINEMOD_DIR, vote_type, augment=False, use_motion=motion_model, ) test_sampler = SequentialSampler(test_set) test_batch_sampler = ImageSizeBatchSampler( test_sampler, train_cfg["test_batch_size"], False) test_loader = DataLoader(test_set, batch_sampler=test_batch_sampler, num_workers=num_workers) prefix = "test" if args.use_test_set else "val" # val(net, test_loader, begin_epoch, prefix, use_motion=motion_model) else: begin_epoch = 0 if train_cfg["resume"]: begin_epoch = load_model(net.module.net, optimizer, model_dir) image_db = PVNetLineModImageDB(args.linemod_cls, has_fuse_set=train_cfg["use_fuse"], has_render_set=True) train_db = [] train_db += image_db.render_set if train_cfg["use_real_train"]: train_db += image_db.train_real_set if train_cfg["use_fuse"]: train_db += image_db.fuse_set train_set = PVNetLineModDatasetRealAug( train_db, cfg.PVNET_LINEMOD_DIR, vote_type, augment=True, cfg=train_cfg["aug_cfg"], use_motion=motion_model, ) train_sampler = RandomSampler(train_set) train_batch_sampler = ImageSizeBatchSampler( train_sampler, train_cfg["train_batch_size"], False, cfg=train_cfg["aug_cfg"], ) train_loader = DataLoader(train_set, batch_sampler=train_batch_sampler, num_workers=num_workers) val_db = image_db.val_real_set val_set = PVNetLineModDatasetRealAug( val_db, cfg.PVNET_LINEMOD_DIR, vote_type, augment=False, cfg=train_cfg["aug_cfg"], use_motion=motion_model, ) val_sampler = SequentialSampler(val_set) val_batch_sampler = ImageSizeBatchSampler(val_sampler, train_cfg["test_batch_size"], False, cfg=train_cfg["aug_cfg"]) val_loader = DataLoader(val_set, batch_sampler=val_batch_sampler, num_workers=num_workers) """ if args.linemod_cls in cfg.occ_linemod_cls_names: occ_image_db = OcclusionLineModImageDB(args.linemod_cls) occ_val_db = occ_image_db.test_real_set[ : len(occ_image_db.test_real_set) // 2 ] occ_val_set = PVNetLineModDatasetRealAug( occ_val_db, cfg.OCCLUSION_LINEMOD, vote_type, augment=False, cfg=train_cfg["aug_cfg"], use_motion=motion_model, ) occ_val_sampler = SequentialSampler(occ_val_set) occ_val_batch_sampler = ImageSizeBatchSampler( occ_val_sampler, train_cfg["test_batch_size"], False, cfg=train_cfg["aug_cfg"], ) occ_val_loader = DataLoader( occ_val_set, batch_sampler=occ_val_batch_sampler, num_workers=num_workers ) """ with tqdm.trange(begin_epoch, train_cfg["epoch_num"] + 1, desc="epochs") as tbar: for epoch in tbar: adjust_learning_rate( optimizer, epoch, train_cfg["lr_decay_rate"], train_cfg["lr_decay_epoch"], ) train(net, optimizer, train_loader, epoch) # val(net, val_loader, epoch, use_motion=motion_model) # if args.linemod_cls in cfg.occ_linemod_cls_names: # val(net, occ_val_loader, epoch, "occ_val", use_motion=motion_model) save_model(net.module.net, optimizer, epoch, model_dir) tbar.refresh()
def replicate(self, module, device_ids): if self.replicas is None: from torch.nn.parallel.replicate import replicate self.replicas = replicate(module, device_ids, not torch.is_grad_enabled()) return self.replicas dset_sbert = Dataset.load_from_disk("/home/ahemf/processed_datasets/dsets_448_sbert") model_id = "distilgpt2" # 'gpt2' model = GPT2LMHeadModel.from_pretrained(model_id).eval() max_length = 256 stride = max_length for p in model.parameters(): p.requires_grad = False if torch.cuda.device_count() > 1: model = DataParallel(model) model.to(device) for p in model.parameters(): p.requires_grad = False tokenizer = GPT2TokenizerFast.from_pretrained(model_id) tokenizer.pad_token = tokenizer.eos_token def perplexity(x, device): # Batch Size = 1 only because loss from GPT2LMHeadModel gives one cross entropy value for whole input encoded = tokenizer.batch_encode_plus(x["text"], padding=True, max_length=max_length, return_tensors="pt") input_ids = encoded["input_ids"][:, :1024].to(device) attention_mask = encoded["attention_mask"][:, :1024].to(device) lengths = attention_mask.sum(1)
def main(): if raw: print('building files') build_files(data_path=raw_data_path) print('files built') model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel( config=model_config) model.to(device) multi_gpu = False full_line = '' print('calculating total steps') for i in tqdm(range(num_pieces)): with open(tokenized_data_path + 'tokenized_train_{}.txt'.format(i), 'r') as f: full_line += f.read() full_line = full_line.strip() full_line = [int(item) for item in full_line.split()] len_full_line = len(full_line) samples = [] start_point = 0 while start_point + n_ctx < len_full_line: samples.append(full_line[start_point:start_point + n_ctx]) start_point += stride total_steps = int( len(samples) * epochs / batch_size / gradient_accumulation) print('total steps = {}'.format(total_steps)) optimizer = pytorch_transformers.AdamW(model.parameters(), lr=lr, correct_bias=True) scheduler = pytorch_transformers.WarmupLinearSchedule( optimizer, warmup_steps=warmup_steps, t_total=total_steps) if fp16: try: from apex import amp except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) model, optimizer = amp.initialize(model, optimizer, opt_level=fp16_opt_level) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = DataParallel(model) multi_gpu = True print('starting training') for epoch in range(epochs): print('epoch {}'.format(epoch + 1)) now = datetime.now() print('time: {}'.format(now)) running_loss = 0 random.shuffle(samples) for step in range(len(samples) // batch_size): # prepare data batch = samples[step * batch_size:(step + 1) * batch_size] batch_labels = [] batch_inputs = [] for ids in batch: int_ids_for_labels = [int(x) for x in ids] int_ids_for_inputs = [int(x) for x in ids] batch_labels.append(int_ids_for_labels) batch_inputs.append(int_ids_for_inputs) batch_labels = torch.tensor(batch_labels).long().to(device) batch_inputs = torch.tensor(batch_inputs).long().to(device) # forward pass outputs = model.forward(input_ids=batch_inputs, labels=batch_labels) loss, logits = outputs[:2] # get loss if multi_gpu: loss = loss.mean() if gradient_accumulation > 1: loss = loss / gradient_accumulation # loss backward if fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() torch.nn.utils.clip_grad_norm_( amp.master_params(optimizer), max_grad_norm) else: loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) # optimizer step if (step + 1) % gradient_accumulation == 0: running_loss += loss.item() scheduler.step() optimizer.step() optimizer.zero_grad() if (step + 1) % log_step == 0: print('step {} of epoch {}, loss {}'.format( (step + 1) // gradient_accumulation, epoch + 1, running_loss * gradient_accumulation**2 / log_step)) running_loss = 0 print('saving model for epoch {}'.format(epoch + 1)) if not os.path.exists('./model/model_epoch{}'.format(epoch + 1)): os.mkdir('./model/model_epoch{}'.format(epoch + 1)) model_to_save = model.module if hasattr(model, 'module') else model model_to_save.save_pretrained('./model/model_epoch{}'.format(epoch + 1)) torch.save(scheduler.state_dict(), './model/model_epoch{}/scheduler.pt'.format(epoch + 1)) torch.save(optimizer.state_dict(), './model/model_epoch{}/optimizer.pt'.format(epoch + 1)) print('epoch {} finished'.format(epoch + 1)) then = datetime.now() print('time: {}'.format(then)) print('time for one epoch: {}'.format(then - now)) print('training finished') if not os.path.exists('./model/final_model'): os.mkdir('./model/final_model') model_to_save = model.module if hasattr(model, 'module') else model model_to_save.save_pretrained('./model/final_model') torch.save(scheduler.state_dict(), './model/final_model/scheduler.pt') torch.save(optimizer.state_dict(), './model/final_model/optimizer.pt')
} FROM_TRAIN_ITER = 15 Net = YOLOv1Net(CFG=YOLOv1Config) for m in Net.modules(): if isinstance(m, Linear) or isinstance(m, Conv2d): m.weight.data.normal_(0, 0.01) m.bias.data.zero_() if YOLOv1Config["GPU_NUMS"] > 0: Net = Net.cuda() if YOLOv1Config["GPU_NUMS"] > 1: Net = DataParallel(Net) train_dataset = yoloDataset(root=os.path.join(YOLOv1Config["DATA_PATH"], "VOC2012", "JPEGImages"), list_file='utils/voc2012train.txt', train=True, transform=[ToTensor()]) train_loader = DataLoader(train_dataset, batch_size=YOLOv1Config["BATCH_SIZE"], shuffle=True) criterion = YOLOv1Loss() # 优化器 optimizer = SGD(Net.parameters(), lr=YOLOv1Config["LEARNING_RATE"], momentum=0.95,
def make_network(configs): PoseNet = importNet(configs['network']) train_cfg = configs['train'] config = configs['inference'] poseNet = PoseNet(**config) forward_net = DataParallel(poseNet.cuda()) def calc_loss(*args, **kwargs): return poseNet.calc_loss(*args, **kwargs) config['net'] = Trainer(forward_net, configs['inference']['keys'], calc_loss) train_cfg['optimizer'] = torch.optim.Adam(config['net'].parameters(), train_cfg['learning_rate']) exp_path = os.path.join('exp', configs['opt'].exp) if not os.path.exists(exp_path): os.mkdir(exp_path) logger = open(os.path.join(exp_path, 'log'), 'a+') def make_train(batch_id, config, phase, **inputs): for i in inputs: inputs[i] = make_input(inputs[i]) net = config['inference']['net'] config['batch_id'] = batch_id if phase != 'inference': result = net(inputs['imgs'], **{i:inputs[i] for i in inputs if i!='imgs'}) num_loss = len(config['train']['loss']) ## I use the last outputs as the loss ## the weights of the loss are controlled by config['train']['loss'] losses = {i[0]: result[-num_loss + idx]*i[1] for idx, i in enumerate(config['train']['loss'])} loss = 0 toprint = '\n{}: '.format(batch_id) for i in losses: loss = loss + torch.mean(losses[i]) my_loss = make_output( losses[i] ) my_loss = my_loss.mean(axis = 0) if my_loss.size == 1: toprint += ' {}: {}'.format(i, format(my_loss.mean(), '.8f')) else: toprint += '\n{}'.format(i) for j in my_loss: toprint += ' {}'.format(format(j.mean(), '.8f')) logger.write(toprint) logger.flush() if batch_id == 200000: ## decrease the learning rate after 200000 iterations for param_group in optimizer.param_groups: param_group['lr'] = 1e-5 optimizer = train_cfg['optimizer'] optimizer.zero_grad() loss.backward() optimizer.step() return None else: out = {} net = net.eval() result = net(**inputs) if type(result)!=list and type(result)!=tuple: result = [result] out['preds'] = [make_output(i) for i in result] return out return make_train
def main(): parser = argparse.ArgumentParser() parser.add_argument('--device', default='0,1,2,3', type=str, required=False, help='设置使用哪些显卡') parser.add_argument('--model_config', default='config/model_config_small.json', type=str, required=False, help='选择模型参数') parser.add_argument('--tokenizer_path', default='cache/vocab_small.txt', type=str, required=False, help='选择词库') parser.add_argument('--raw_data_path', default='data/eval.json', type=str, required=False, help='原始语料') parser.add_argument('--tokenized_data_path', default='data/tokenized_eval/', type=str, required=False, help='tokenized语料存放位置') parser.add_argument('--raw', action='store_true', help='是否先做tokenize') parser.add_argument('--batch_size', default=8, type=int, required=False, help='batch size') parser.add_argument('--log_step', default=1, type=int, required=False, help='多少步汇报一次') parser.add_argument('--stride', default=768, type=int, required=False, help='取数据的窗口步长') parser.add_argument('--num_pieces', default=100, type=int, required=False, help='将训练语料分成多少份') parser.add_argument('--min_length', default=128, type=int, required=False, help='最短收录文章长度') parser.add_argument('--pretrained_model', default='', type=str, required=False, help='模型起点路径') parser.add_argument('--no_wordpiece', action='store_true', help='不做word piece切词') parser.add_argument('--output_dir', default='eval_result/', type=str, required=False, help='结果输出路径') args = parser.parse_args() print('args:\n' + args.__repr__()) if args.no_wordpiece: from tokenizations import tokenization_bert_without_wordpiece as tokenization_bert else: from tokenizations import tokenization_bert os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡 model_config = pytorch_transformers.modeling_gpt2.GPT2Config.from_json_file( args.model_config) print('config:\n' + model_config.to_json_string()) n_ctx = model_config.n_ctx full_tokenizer = tokenization_bert.BertTokenizer( vocab_file=args.tokenizer_path) full_tokenizer.max_len = n_ctx device = 'cuda' if torch.cuda.is_available() else 'cpu' print('using device:', device) raw_data_path = args.raw_data_path tokenized_data_path = args.tokenized_data_path raw = args.raw # 选择是否从零开始构建数据集 batch_size = args.batch_size log_step = args.log_step stride = args.stride num_pieces = args.num_pieces min_length = args.min_length output_dir = args.output_dir if not os.path.exists(output_dir): os.mkdir(output_dir) if raw: print('building files') build_files(data_path=raw_data_path, tokenized_data_path=tokenized_data_path, num_pieces=num_pieces, full_tokenizer=full_tokenizer, min_length=min_length) print('files built') if not args.pretrained_model: print('you need to specify a trained model.') exit(1) else: model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained( args.pretrained_model) model.eval() model.to(device) num_parameters = 0 parameters = model.parameters() for parameter in parameters: num_parameters += parameter.numel() print('number of parameters: {}'.format(num_parameters)) multi_gpu = False full_len = 0 print('calculating total steps') for i in tqdm(range(num_pieces)): with open(tokenized_data_path + 'tokenized_train_{}.txt'.format(i), 'r') as f: full_len += len([int(item) for item in f.read().strip().split()]) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = DataParallel(model) multi_gpu = True print('starting training') overall_step = 0 total_loss = 0 total_steps = 0 # eval now = datetime.now() print('time: {}'.format(now)) piece_num = 0 for i in range(num_pieces): with open(tokenized_data_path + 'tokenized_train_{}.txt'.format(i), 'r') as f: line = f.read().strip() tokens = line.split() tokens = [int(token) for token in tokens] start_point = 0 samples = [] while start_point < len(tokens) - n_ctx: samples.append(tokens[start_point:start_point + n_ctx]) start_point += stride start_point -= stride last = tokens[start_point + n_ctx:] last.extend([ full_tokenizer.convert_tokens_to_ids(['[PAD]']) * (n_ctx - len(last)) ]) random.shuffle(samples) for step in range(len(samples) // batch_size): # drop last # prepare data batch = samples[step * batch_size:(step + 1) * batch_size] batch_labels = [] batch_inputs = [] for ids in batch: int_ids_for_labels = [int(x) for x in ids] int_ids_for_inputs = [int(x) for x in ids] batch_labels.append(int_ids_for_labels) batch_inputs.append(int_ids_for_inputs) batch_labels = torch.tensor(batch_labels).long().to(device) batch_inputs = torch.tensor(batch_inputs).long().to(device) # forward pass outputs = model.forward(input_ids=batch_inputs, labels=batch_labels) loss, logits = outputs[:2] # get loss if multi_gpu: loss = loss.mean() total_loss += loss total_steps += 1 if (overall_step + 1) % log_step == 0: print('now time: {}:{}. Step {} of piece {}, ppl {}'.format( datetime.now().hour, datetime.now().minute, (step + 1), piece_num, torch.exp(loss))) piece_num += 1 if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) else: with open(args.output_dir + 'result.txt', 'w') as f: f.write(np.exp(total_loss / total_steps))
def main(): args.can_print = (args.distributed and args.local_rank == 0) or (not args.distributed) log_out_dir = f'{RESULT_DIR}/logs/{args.out_dir}' os.makedirs(log_out_dir, exist_ok=True) if args.can_print: log = Logger() log.open(f'{log_out_dir}/log.train.txt', mode='a') else: log = None model_out_dir = f'{RESULT_DIR}/models/{args.out_dir}' if args.can_print: log.write( f'>> Creating directory if it does not exist:\n>> {model_out_dir}\n' ) os.makedirs(model_out_dir, exist_ok=True) # set cuda visible device os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id if args.distributed: torch.cuda.set_device(args.local_rank) # set random seeds torch.manual_seed(0) torch.cuda.manual_seed_all(0) np.random.seed(0) model_params = {} model_params['architecture'] = args.arch model_params['num_classes'] = args.num_classes model_params['in_channels'] = args.in_channels model_params['can_print'] = args.can_print model = init_network(model_params) # move network to gpu if args.distributed: dist.init_process_group(backend='nccl', init_method='env://') model = convert_syncbn_model(model) model.cuda() if args.distributed: model = DistributedDataParallel(model, delay_allreduce=True) else: model = DataParallel(model) # define loss function (criterion) try: criterion = eval(args.loss)().cuda() except: raise RuntimeError(f'Loss {args.loss} not available!') start_epoch = 0 best_score = 0 best_epoch = 0 # define scheduler try: scheduler = eval(args.scheduler)(model) except: raise RuntimeError(f'Scheduler {args.scheduler} not available!') # optionally resume from a checkpoint reset_epoch = True pretrained_file = None if args.model_file: reset_epoch = True pretrained_file = args.model_file if args.resume: reset_epoch = False pretrained_file = f'{model_out_dir}/{args.resume}' if pretrained_file and os.path.isfile(pretrained_file): # load checkpoint weights and update model and optimizer if args.can_print: log.write(f'>> Loading checkpoint:\n>> {pretrained_file}\n') checkpoint = torch.load(pretrained_file) if not reset_epoch: start_epoch = checkpoint['epoch'] best_epoch = checkpoint['best_epoch'] best_score = checkpoint['best_score'] model.module.load_state_dict(checkpoint['state_dict']) if args.can_print: if reset_epoch: log.write(f'>>>> loaded checkpoint:\n>>>> {pretrained_file}\n') else: log.write( f'>>>> loaded checkpoint:\n>>>> {pretrained_file} (epoch {checkpoint["epoch"]:.2f})\n' ) else: if args.can_print: log.write(f'>> No checkpoint found at {pretrained_file}\n') # Data loading code train_transform = eval(f'train_multi_augment{args.aug_version}') train_split_file = f'{DATA_DIR}/split/{args.split_type}/random_train_cv0.csv' valid_split_file = f'{DATA_DIR}/split/{args.split_type}/random_valid_cv0.csv' train_dataset = RetrievalDataset( args, train_split_file, transform=train_transform, data_type='train', ) valid_dataset = RetrievalDataset( args, valid_split_file, transform=None, data_type='valid', ) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) valid_sampler = torch.utils.data.distributed.DistributedSampler( valid_dataset) else: train_sampler = RandomSampler(train_dataset) valid_sampler = SequentialSampler(valid_dataset) train_loader = DataLoader( train_dataset, sampler=train_sampler, batch_size=args.batch_size, drop_last=True, num_workers=args.workers, pin_memory=True, collate_fn=image_collate, ) valid_loader = DataLoader( valid_dataset, sampler=valid_sampler, batch_size=args.batch_size, drop_last=False, num_workers=args.workers, pin_memory=True, collate_fn=image_collate, ) train(args, train_loader, valid_loader, model, criterion, scheduler, log, best_epoch, best_score, start_epoch, model_out_dir)
def _setup_model(self, model: Module) -> DataParallel: """Wraps the given model into a :class:`~torch.nn.parallel.DataParallel` module.""" return DataParallel(module=model, device_ids=self.parallel_devices)
def run_experiment_VAE( run_name, out_dir='./results', seed=42, # Training params bs_train=128, bs_test=None, batches=100, epochs=100, early_stopping=3, checkpoints=None, lr=1e-3, RTplot=False, print_every=100, # Model params h_dim=256, z_dim=5, x_sigma2=0.9, betas=(0.9, 0.999), **kw): """ Execute a single run of experiment 1 with a single configuration. :param run_name: The name of the run and output file to create. :param out_dir: Where to write the output to. """ torch.manual_seed(seed) if not bs_test: bs_test = max([bs_train // 4, 1]) cfg = locals() DATA_DIR = pathlib.Path.home().joinpath('.pytorch-datasets') _, dataset_dir = cs236605.download.download_data(out_path=DATA_DIR, url=DATA_URL, extract=True, force=False) im_size = 64 tf = T.Compose([ # Resize to constant spatial dimensions T.Resize((im_size, im_size)), # PIL.Image -> torch.Tensor T.ToTensor(), # Dynamic range [0,1] -> [-1, 1] T.Normalize(mean=(.5, .5, .5), std=(.5, .5, .5)), ]) ds_gwb = ImageFolder(os.path.dirname(dataset_dir), tf) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # TODO: Train # - Create model, loss, optimizer and trainer based on the parameters. # Use the model you've implemented previously, cross entropy loss and # any optimizer that you wish. # - Run training and save the FitResults in the fit_res variable. # - The fit results and all the experiment parameters will then be saved # for you automatically. fit_res = None # ====== YOUR CODE: ====== # Data split_lengths = [int(len(ds_gwb) * 0.9), int(len(ds_gwb) * 0.1)] ds_train, ds_test = random_split(ds_gwb, split_lengths) dl_train = DataLoader(ds_train, bs_train, shuffle=True) dl_test = DataLoader(ds_test, bs_train, shuffle=True) im_size = ds_train[0][0].shape # Model encoder = autoencoder.EncoderCNN(in_channels=im_size[0], out_channels=h_dim) decoder = autoencoder.DecoderCNN(in_channels=h_dim, out_channels=im_size[0]) vae = autoencoder.VAE(encoder, decoder, im_size, z_dim) vae_dp = DataParallel(vae).to(device) print(vae) # Optimizer optimizer = optim.Adam(vae.parameters(), lr=lr, betas=betas) # Loss def loss_fn(x, xr, z_mu, z_log_sigma2): return autoencoder.vae_loss(x, xr, z_mu, z_log_sigma2, x_sigma2) def post_epoch_fn(epoch, train_result, test_result, verbose): # Plot some samples if this is a verbose epoch if verbose: samples = vae.sample(n=5) fig, _ = plot.tensors_as_images(samples, figsize=(6, 2)) if RTplot: IPython.display.display(fig) else: name = run_name + '_Ep_' + str(epoch) fig.savefig(out_dir + name + '.png') plt.close(fig) # Trainer trainer = VAETrainer(vae_dp, loss_fn, optimizer, device) checkpoint_file = 'checkpoints/vae' checkpoint_file_final = f'{checkpoint_file}_final' if os.path.isfile(f'{checkpoint_file}.pt'): os.remove(f'{checkpoint_file}.pt') fit_res = trainer.fit(dl_train, dl_test, num_epochs=epochs, early_stopping=20, print_every=print_every, checkpoints=checkpoint_file, post_epoch_fn=post_epoch_fn) last_train_loss = fit_res.train_loss[-1] last_test_loss = fit_res.test_loss[-1] # ======================== save_experiment(run_name, out_dir, cfg, fit_res) return {'train': last_train_loss, 'test': last_test_loss}
def train_model(train_dataset, train_num_each, val_dataset, val_num_each): num_train = len(train_dataset) num_val = len(val_dataset) train_useful_start_idx = get_useful_start_idx(sequence_length, train_num_each) val_useful_start_idx = get_useful_start_idx(sequence_length, val_num_each) num_train_we_use = len(train_useful_start_idx) // num_gpu * num_gpu num_val_we_use = len(val_useful_start_idx) // num_gpu * num_gpu # num_train_we_use = 800 # num_val_we_use = 800 train_we_use_start_idx = train_useful_start_idx[0:num_train_we_use] val_we_use_start_idx = val_useful_start_idx[0:num_val_we_use] train_idx = [] for i in range(num_train_we_use): for j in range(sequence_length): train_idx.append(train_we_use_start_idx[i] + j) val_idx = [] for i in range(num_val_we_use): for j in range(sequence_length): val_idx.append(val_we_use_start_idx[i] + j) num_train_all = len(train_idx) num_val_all = len(val_idx) print('num train start idx : {:6d}'.format(len(train_useful_start_idx))) print('last idx train start: {:6d}'.format(train_useful_start_idx[-1])) print('num of train dataset: {:6d}'.format(num_train)) print('num of train we use : {:6d}'.format(num_train_we_use)) print('num of all train use: {:6d}'.format(num_train_all)) print('num valid start idx : {:6d}'.format(len(val_useful_start_idx))) print('last idx valid start: {:6d}'.format(val_useful_start_idx[-1])) print('num of valid dataset: {:6d}'.format(num_val)) print('num of valid we use : {:6d}'.format(num_val_we_use)) print('num of all valid use: {:6d}'.format(num_val_all)) train_loader = DataLoader(train_dataset, batch_size=train_batch_size, sampler=train_idx, num_workers=workers, pin_memory=False) val_loader = DataLoader(val_dataset, batch_size=val_batch_size, sampler=val_idx, num_workers=workers, pin_memory=False) model = multi_lstm() model = DataParallel(model) model.load_state_dict( torch.load( 'cnn_lstm_epoch_25_length_4_opt_1_mulopt_1_flip_0_crop_1_batch_200_train1_9998_train2_9987_val1_9731_val2_8752.pth' )) kl_fc_t2p = nn.Linear(7, 7) all_tool_to_phase = np.load('kl_fc_t2p.npy') kl_fc_t2p.weight.data = torch.from_numpy( all_tool_to_phase.astype('float32')) for param in kl_fc_t2p.parameters(): param.requires_grad = True if use_gpu: model = model.cuda() kl_fc_t2p = kl_fc_t2p.cuda() criterion_1 = nn.BCEWithLogitsLoss(size_average=False) criterion_2 = nn.CrossEntropyLoss(size_average=False) criterion_3 = nn.KLDivLoss(size_average=False) softmax_cuda = nn.Softmax().cuda() sigmoid_cuda = nn.Sigmoid().cuda() if multi_optim == 0: if optimizer_choice == 0: optimizer = optim.SGD([model.parameters(), kl_fc_t2p.parameters()], lr=learning_rate, momentum=momentum, dampening=dampening, weight_decay=weight_decay, nesterov=use_nesterov) if sgd_adjust_lr == 0: exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=sgd_step, gamma=sgd_gamma) elif sgd_adjust_lr == 1: exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau( optimizer, 'min') elif optimizer_choice == 1: optimizer = optim.Adam( [model.parameters(), kl_fc_t2p.parameters()], lr=learning_rate) elif multi_optim == 1: if optimizer_choice == 0: optimizer = optim.SGD([ { 'params': model.module.share.parameters() }, { 'params': kl_fc_t2p.parameters() }, { 'params': model.module.lstm.parameters(), 'lr': learning_rate }, { 'params': model.module.fc.parameters(), 'lr': learning_rate }, ], lr=learning_rate / 10, momentum=momentum, dampening=dampening, weight_decay=weight_decay, nesterov=use_nesterov) if sgd_adjust_lr == 0: exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=sgd_step, gamma=sgd_gamma) elif sgd_adjust_lr == 1: exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau( optimizer, 'min') elif optimizer_choice == 1: optimizer = optim.Adam([ { 'params': model.module.share.parameters() }, { 'params': kl_fc_t2p.parameters() }, { 'params': model.module.lstm.parameters(), 'lr': learning_rate }, { 'params': model.module.fc.parameters(), 'lr': learning_rate }, ], lr=learning_rate / 10) best_model_wts = copy.deepcopy(model.state_dict()) best_val_accuracy_1 = 0.0 best_val_accuracy_2 = 0.0 # judge by accu2 correspond_train_acc_1 = 0.0 correspond_train_acc_2 = 0.0 # 要存储2个train的准确率 2个valid的准确率 3个train 3个loss的loss, 一共12个数据要记录 record_np = np.zeros([epochs, 10]) for epoch in range(epochs): # np.random.seed(epoch) np.random.shuffle(train_we_use_start_idx) train_idx = [] for i in range(num_train_we_use): for j in range(sequence_length): train_idx.append(train_we_use_start_idx[i] + j) train_loader = DataLoader(train_dataset, batch_size=train_batch_size, sampler=train_idx, num_workers=workers, pin_memory=False) model.train() train_loss_1 = 0.0 train_loss_2 = 0.0 train_loss_3 = 0.0 train_corrects_1 = 0 train_corrects_2 = 0 train_start_time = time.time() for data in train_loader: inputs, labels_1, labels_2 = data if use_gpu: inputs = Variable(inputs.cuda()) labels_1 = Variable(labels_1.cuda()) labels_2 = Variable(labels_2.cuda()) else: inputs = Variable(inputs) labels_1 = Variable(labels_1) labels_2 = Variable(labels_2) optimizer.zero_grad() outputs_1, outputs_2 = model.forward(inputs) sig_output_1 = sigmoid_cuda(outputs_1) soft_output_2 = softmax_cuda(outputs_2) sig_output_1 = Variable(sig_output_1.data, requires_grad=False) soft_output_2 = Variable(soft_output_2.data, requires_grad=False) kl_output_1 = kl_fc_t2p(sig_output_1) preds_1 = torch.cuda.ByteTensor(sig_output_1.data > 0.5) preds_1 = preds_1.long() train_corrects_1 += torch.sum(preds_1 == labels_1.data) labels_1 = Variable(labels_1.data.float()) loss_1 = criterion_1(outputs_1, labels_1) loss_2 = criterion_2(outputs_2, labels_2) _, preds_2 = torch.max(outputs_2.data, 1) train_corrects_2 += torch.sum(preds_2 == labels_2.data) loss_3 = torch.abs(criterion_3(kl_output_1, soft_output_2)) loss = loss_1 + loss_2 + loss_3 loss.backward() optimizer.step() train_loss_1 += loss_1.data[0] train_loss_2 += loss_2.data[0] train_loss_3 += loss_3.data[0] train_elapsed_time = time.time() - train_start_time train_accuracy_1 = train_corrects_1 / num_train_all / 7 train_accuracy_2 = train_corrects_2 / num_train_all train_average_loss_1 = train_loss_1 / num_train_all / 7 train_average_loss_2 = train_loss_2 / num_train_all train_average_loss_3 = train_loss_3 / num_train_all # begin eval model.eval() val_loss_1 = 0.0 val_loss_2 = 0.0 val_loss_3 = 0.0 val_corrects_1 = 0 val_corrects_2 = 0 val_start_time = time.time() for data in val_loader: inputs, labels_1, labels_2 = data labels_2 = labels_2[(sequence_length - 1)::sequence_length] if use_gpu: inputs = Variable(inputs.cuda(), volatile=True) labels_1 = Variable(labels_1.cuda(), volatile=True) labels_2 = Variable(labels_2.cuda(), volatile=True) else: inputs = Variable(inputs, volatile=True) labels_1 = Variable(labels_1, volatile=True) labels_2 = Variable(labels_2, volatile=True) if crop_type == 0 or crop_type == 1: outputs_1, outputs_2 = model.forward(inputs) elif crop_type == 5: inputs = inputs.permute(1, 0, 2, 3, 4).contiguous() inputs = inputs.view(-1, 3, 224, 224) outputs_1, outputs_2 = model.forward(inputs) outputs_1 = outputs_1.view(5, -1, 7) outputs_1 = torch.mean(outputs_1, 0) outputs_2 = outputs_2.view(5, -1, 7) outputs_2 = torch.mean(outputs_2, 0) elif crop_type == 10: inputs = inputs.permute(1, 0, 2, 3, 4).contiguous() inputs = inputs.view(-1, 3, 224, 224) outputs_1, outputs_2 = model.forward(inputs) outputs_1 = outputs_1.view(10, -1, 7) outputs_1 = torch.mean(outputs_1, 0) outputs_2 = outputs_2.view(10, -1, 7) outputs_2 = torch.mean(outputs_2, 0) sig_output_1 = sigmoid_cuda(outputs_1) soft_output_2 = softmax_cuda(outputs_2) sig_output_1 = Variable(sig_output_1.data, requires_grad=False) soft_output_2 = Variable(soft_output_2.data, requires_grad=False) kl_output_1 = (kl_fc_t2p(sig_output_1)) outputs_2 = outputs_2[sequence_length - 1::sequence_length] _, preds_2 = torch.max(outputs_2.data, 1) preds_1 = torch.cuda.ByteTensor(sig_output_1.data > 0.5) preds_1 = preds_1.long() val_corrects_1 += torch.sum(preds_1 == labels_1.data) labels_1 = Variable(labels_1.data.float()) loss_1 = criterion_1(outputs_1, labels_1) loss_2 = criterion_2(outputs_2, labels_2) val_corrects_2 += torch.sum(preds_2 == labels_2.data) loss_3 = torch.abs(criterion_3(kl_output_1, soft_output_2)) val_loss_1 += loss_1.data[0] val_loss_2 += loss_2.data[0] val_loss_3 += loss_3.data[0] val_elapsed_time = time.time() - val_start_time val_accuracy_1 = val_corrects_1 / (num_val_all * 7) val_accuracy_2 = val_corrects_2 / num_val_we_use val_average_loss_1 = val_loss_1 / (num_val_all * 7) val_average_loss_2 = val_loss_2 / num_val_we_use val_average_loss_3 = val_loss_3 / num_val_all print('epoch: {:3d}' ' train time: {:2.0f}m{:2.0f}s' ' train accu_1: {:.4f}' ' train accu_2: {:.4f}' ' train loss_1: {:4.4f}' ' train loss_2: {:4.4f}' ' train loss_3: {:4.4f}'.format( epoch, train_elapsed_time // 60, train_elapsed_time % 60, train_accuracy_1, train_accuracy_2, train_average_loss_1, train_average_loss_2, train_average_loss_3)) print('epoch: {:3d}' ' valid time: {:2.0f}m{:2.0f}s' ' valid accu_1: {:.4f}' ' valid accu_2: {:.4f}' ' valid loss_1: {:4.4f}' ' valid loss_2: {:4.4f}' ' valid loss_3: {:4.4f}'.format(epoch, val_elapsed_time // 60, val_elapsed_time % 60, val_accuracy_1, val_accuracy_2, val_average_loss_1, val_average_loss_2, val_average_loss_3)) if optimizer_choice == 0: if sgd_adjust_lr == 0: exp_lr_scheduler.step() elif sgd_adjust_lr == 1: exp_lr_scheduler.step(val_average_loss_1 + val_average_loss_2) if val_accuracy_2 > best_val_accuracy_2 and val_accuracy_1 > 0.95: best_val_accuracy_2 = val_accuracy_2 best_val_accuracy_1 = val_accuracy_1 correspond_train_acc_1 = train_accuracy_1 correspond_train_acc_2 = train_accuracy_2 best_model_wts = copy.deepcopy(model.state_dict()) elif val_accuracy_2 == best_val_accuracy_2 and val_accuracy_1 > 0.95: if val_accuracy_1 > best_val_accuracy_1: correspond_train_acc_1 = train_accuracy_1 correspond_train_acc_2 = train_accuracy_2 best_model_wts = copy.deepcopy(model.state_dict()) elif val_accuracy_1 == best_val_accuracy_1: if train_accuracy_2 > correspond_train_acc_2: correspond_train_acc_2 = train_accuracy_2 correspond_train_acc_1 = train_accuracy_1 best_model_wts = copy.deepcopy(model.state_dict()) elif train_accuracy_2 == correspond_train_acc_2: if train_accuracy_1 > best_val_accuracy_1: correspond_train_acc_1 = train_accuracy_1 best_model_wts = copy.deepcopy(model.state_dict()) record_np[epoch, 0] = train_accuracy_1 record_np[epoch, 1] = train_accuracy_2 record_np[epoch, 2] = train_average_loss_1 record_np[epoch, 3] = train_average_loss_2 record_np[epoch, 4] = train_average_loss_3 record_np[epoch, 5] = val_accuracy_1 record_np[epoch, 6] = val_accuracy_2 record_np[epoch, 7] = val_average_loss_1 record_np[epoch, 8] = val_average_loss_2 record_np[epoch, 9] = val_average_loss_3 print('best accuracy_1: {:.4f} cor train accu_1: {:.4f}'.format( best_val_accuracy_1, correspond_train_acc_1)) print('best accuracy_2: {:.4f} cor train accu_2: {:.4f}'.format( best_val_accuracy_2, correspond_train_acc_2)) save_val_1 = int("{:4.0f}".format(best_val_accuracy_1 * 10000)) save_val_2 = int("{:4.0f}".format(best_val_accuracy_2 * 10000)) save_train_1 = int("{:4.0f}".format(correspond_train_acc_1 * 10000)) save_train_2 = int("{:4.0f}".format(correspond_train_acc_2 * 10000)) public_name = "cnn_lstm_klt2p" \ + "_epoch_" + str(epochs) \ + "_length_" + str(sequence_length) \ + "_opt_" + str(optimizer_choice) \ + "_mulopt_" + str(multi_optim) \ + "_flip_" + str(use_flip) \ + "_crop_" + str(crop_type) \ + "_batch_" + str(train_batch_size) \ + "_train1_" + str(save_train_1) \ + "_train2_" + str(save_train_2) \ + "_val1_" + str(save_val_1) \ + "_val2_" + str(save_val_2) model_name = public_name + ".pth" torch.save(best_model_wts, model_name) record_name = public_name + ".npy" np.save(record_name, record_np) kl_fc_t2p_name = public_name + "t2p.npy" kl_fc_t2p_np = kl_fc_t2p.cpu().weight.data.numpy() np.save(kl_fc_t2p_name, kl_fc_t2p_np)
def test_model(test_dataset, test_num_each): num_test = len(test_dataset) test_idx = [i for i in range(num_test)] print('num of test dataset: {:6d}'.format(num_test)) test_loader = DataLoader( test_dataset, batch_size=test_batch_size, sampler=test_idx, num_workers=workers, pin_memory=False ) model = pure_resnet() model = DataParallel(model) model.load_state_dict(torch.load(model_name)) if use_gpu: model = model.cuda() criterion = nn.CrossEntropyLoss(size_average=False) model.eval() test_loss = 0.0 test_corrects = 0 all_preds = [] test_start_time = time.time() for data in test_loader: inputs, labels_1, labels_2 = data if use_gpu: inputs = Variable(inputs.cuda(), volatile=True) labels = Variable(labels_2.cuda(), volatile=True) else: inputs = Variable(inputs, volatile=True) labels = Variable(labels_2, volatile=True) if crop_type == 0 or crop_type == 1: outputs = model.forward(inputs) elif crop_type == 5: inputs = inputs.permute(1, 0, 2, 3, 4).contiguous() inputs = inputs.view(-1, 3, 224, 224) outputs = model.forward(inputs) outputs = outputs.view(5, -1, 7) outputs = torch.mean(outputs, 0) elif crop_type == 10: inputs = inputs.permute(1, 0, 2, 3, 4).contiguous() inputs = inputs.view(-1, 3, 224, 224) outputs = model.forward(inputs) outputs = outputs.view(10, -1, 7) outputs = torch.mean(outputs, 0) _, preds = torch.max(outputs.data, 1) for i in range(len(preds)): all_preds.append(preds[i]) loss = criterion(outputs, labels) test_loss += loss.data[0] test_corrects += torch.sum(preds == labels.data) # print(test_corrects) test_elapsed_time = time.time() - test_start_time test_accuracy = test_corrects / num_test test_average_loss = test_loss / num_test save_test = int("{:4.0f}".format(test_accuracy * 10000)) pred_name = model_pure_name + '_test_' + str(save_test)+'_crop_' + str(crop_type) + '.pkl' with open(pred_name, 'wb') as f: pickle.dump(all_preds, f) print('test elapsed: {:2.0f}m{:2.0f}s' ' test loss: {:4.4f}' ' test accu: {:.4f}' .format(test_elapsed_time // 60, test_elapsed_time % 60, test_average_loss, test_accuracy))
def train(self): if not self.pretrained_model: model = GPT2KWModel(config=self.model_config) else: model = GPT2KWModel.from_pretrained(self.pretrained_model) model.train() model.to(self.device) # 计算模型参数量 num_parameters = 0 parameters = model.parameters() for parameter in parameters: num_parameters += parameter.numel() self.print_and_log('模型参数量: {}'.format(num_parameters)) self.print_and_log("开始加载训练集") train_loader = self.create_dataloader() self.print_and_log("训练集加载完毕") epoch_steps = int(train_loader.sampler.num_samples / self.batch_size / self.accumulation_steps) total_steps = epoch_steps * self.epochs self.print_and_log('epoch 步数 = {}'.format(epoch_steps)) self.print_and_log('总步数 = {}'.format(total_steps)) optimizer = pytorch_transformers.AdamW(model.parameters(), lr=self.lr, correct_bias=True) scheduler = pytorch_transformers.WarmupLinearSchedule(optimizer, warmup_steps=self.warmup_steps, t_total=total_steps) if self.fp16: try: from apex import amp except ImportError: raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.") model, optimizer = amp.initialize(model, optimizer, opt_level=self.fp16_opt_level) if torch.cuda.device_count() > 1: model = DataParallel(model) multi_gpu = True else: multi_gpu = False overall_step = 0 for epoch in range(self.epochs): self.print_and_log('epoch {}'.format(epoch + 1)) now = datetime.now() self.print_and_log('time: {}'.format(now)) optimizer.zero_grad() running_loss = 0 for i, batch_data in enumerate(train_loader): if torch.cuda.is_available(): keyword_ids = batch_data[0].to(self.device, non_blocking=True) passage_ids = batch_data[1].to(self.device, non_blocking=True) label_ids = passage_ids.clone().to(self.device, non_blocking=True) else: keyword_ids = batch_data[0] passage_ids = batch_data[1] label_ids = passage_ids.clone() outputs = model(input_ids=passage_ids, keyword_ids=keyword_ids, labels=label_ids) loss, logits = outputs[:2] if multi_gpu: loss = loss.mean() if self.gradient_accumulation > 1: loss = loss / self.gradient_accumulation # loss backward if self.fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), self.max_grad_norm) else: loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), self.max_grad_norm) if (i + 1) % self.gradient_accumulation == 0: running_loss += loss.item() scheduler.step() optimizer.step() optimizer.zero_grad() overall_step += 1 #if (overall_step + 1) % self.log_step == 0: # self.tb_writer.add_scalar('loss', loss.item(), overall_step) if (overall_step + 1) % self.log_step == 0 and running_loss != 0: self.print_and_log('now time: {}:{}. Step {} of epoch {}, loss {}'.format( datetime.now().hour, datetime.now().minute, overall_step + 1, epoch + 1, running_loss * self.gradient_accumulation / self.log_step)) running_loss = 0 if (epoch + 1) % 50 == 0: if not os.path.exists(self.output_dir + 'model_epoch{}'.format(epoch + 1)): os.makedirs(self.output_dir + 'model_epoch{}'.format(epoch + 1)) model_to_save = model.module if hasattr(model, 'module') else model model_to_save.save_pretrained(self.output_dir + 'model_epoch{}'.format(epoch + 1)) # torch.save(scheduler.state_dict(), output_dir + 'model_epoch{}/scheduler.pt'.format(epoch + 1)) # torch.save(optimizer.state_dict(), output_dir + 'model_epoch{}/optimizer.pt'.format(epoch + 1)) then = datetime.now() self.print_and_log('time: {}'.format(then)) self.print_and_log('time for one epoch: {}'.format(then - now)) self.print_and_log('training finished') self.f_log.close() if not os.path.exists(self.output_dir + 'final_model'): os.makedirs(self.output_dir + 'final_model') model_to_save = model.module if hasattr(model, 'module') else model model_to_save.save_pretrained(self.output_dir + 'final_model')
def val( net, dataloader, epoch, val_prefix="val", use_camera_intrinsic=False, use_motion=False, ): for rec in recs: rec.reset() test_begin = time.time() evaluator = Evaluator() eval_net = (DataParallel(EvalWrapper().cuda()) if not use_motion else DataParallel(MotionEvalWrapper().cuda())) uncertain_eval_net = DataParallel(UncertaintyEvalWrapper().cuda()) net.eval() for idx, data in enumerate(dataloader): if use_camera_intrinsic: image, mask, vertex, vertex_weights, pose, corner_target, Ks = [ d.cuda() for d in data ] else: image, mask, vertex, vertex_weights, pose, corner_target = [ d.cuda() for d in data ] with torch.no_grad(): seg_pred, vertex_pred, loss_seg, loss_vertex, precision, recall = net( image, mask, vertex, vertex_weights) loss_seg, loss_vertex, precision, recall = [ torch.mean(val) for val in (loss_seg, loss_vertex, precision, recall) ] if (train_cfg["eval_epoch"] and epoch % train_cfg["eval_inter"] == 0 and epoch >= train_cfg["eval_epoch_begin"]) or args.test_model: if args.use_uncertainty_pnp: mean, cov_inv = uncertain_eval_net(seg_pred, vertex_pred) mean = mean.cpu().numpy() cov_inv = cov_inv.cpu().numpy() else: corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy() pose = pose.cpu().numpy() b = pose.shape[0] pose_preds = [] for bi in range(b): intri_type = "use_intrinsic" if use_camera_intrinsic else "linemod" K = Ks[bi].cpu().numpy() if use_camera_intrinsic else None if args.use_uncertainty_pnp: pose_preds.append( evaluator.evaluate_uncertainty( mean[bi], cov_inv[bi], pose[bi], args.linemod_cls, intri_type, vote_type, intri_matrix=K, )) else: pose_preds.append( evaluator.evaluate( corner_pred[bi], pose[bi], args.linemod_cls, intri_type, vote_type, intri_matrix=K, )) if args.save_inter_result: mask_pr = torch.argmax(seg_pred, 1).cpu().detach().numpy() mask_gt = mask.cpu().detach().numpy() # assume batch size = 1 imsave( os.path.join(args.save_inter_dir, "{}_mask_pr.png".format(idx)), mask_pr[0], ) imsave( os.path.join(args.save_inter_dir, "{}_mask_gt.png".format(idx)), mask_gt[0], ) imsave( os.path.join(args.save_inter_dir, "{}_rgb.png".format(idx)), imagenet_to_uint8(image.cpu().detach().numpy()[0]), ) save_pickle( [pose_preds[0], pose[0]], os.path.join(args.save_inter_dir, "{}_pose.pkl".format(idx)), ) vals = [loss_seg, loss_vertex, precision, recall] for rec, val in zip(recs, vals): rec.update(val) with torch.no_grad(): batch_size = image.shape[0] nrow = 5 if batch_size > 5 else batch_size recorder.rec_segmentation( F.softmax(seg_pred, dim=1), num_classes=2, nrow=nrow, step=epoch, name="{}/image/seg".format(val_prefix), ) recorder.rec_vertex( vertex_pred, vertex_weights, nrow=4, step=epoch, name="{}/image/ver".format(val_prefix), ) losses_batch = OrderedDict() for name, rec in zip(recs_names, recs): losses_batch["{}/".format(val_prefix) + name] = rec.avg if (train_cfg["eval_epoch"] and epoch % train_cfg["eval_inter"] == 0 and epoch >= train_cfg["eval_epoch_begin"]) or args.test_model: proj_err, add, cm = evaluator.average_precision(False) losses_batch["{}/scalar/projection_error".format( val_prefix)] = proj_err losses_batch["{}/scalar/add".format(val_prefix)] = add losses_batch["{}/scalar/cm".format(val_prefix)] = cm recorder.rec_loss_batch(losses_batch, epoch, epoch, val_prefix) for rec in recs: rec.reset() print("epoch {} {} cost {} s".format(epoch, val_prefix, time.time() - test_begin))
def deepMain(): # from deepface.detectors.detector_ssd import FaceDetectorSSDMobilenetV2 net = GazeNet() net = DataParallel(net) net.cuda() # face_detector = FaceDetectorSSDMobilenetV2() face_detector = cv2.CascadeClassifier( 'model/lbpcascade_frontalface_improved.xml') # Load pretrained gaze following model pretrained_dict = torch.load('model/epoch_15_loss_0.0558342523873.pkl') model_dict = net.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) net.load_state_dict(model_dict) cap = cv2.VideoCapture('video.mp4') width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) videoFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) print(width, height, videoFrames, fps) # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file. # out = cv2.VideoWriter('inspecaoCarroResult.avi',cv2.VideoWriter_fourcc('M','J','P','G'), fps, (width, height)) print('Iniciando processamento do video...') while True: frameId = int(cap.get(cv2.CAP_PROP_POS_FRAMES)) if frameId < videoFrames: print(frameId, '/', videoFrames) ret, img = cap.read() t = time.time() originalImg = img img = cv2.resize(img, (640, 480)) height, width, _ = img.shape faces = face_detector.detectMultiScale(img, 1.05, 3) for face in faces: print(face) # Precisa redimensionar imagem para aumentar o crop! faceImage = img[face[1]:face[1] + face[2], face[0]:face[3] + face[0]] gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) formatDetected = [(face[1], face[1] + face[2]), (face[0], face[3] + face[0])] print(formatDetected) x_ = face[0] y_ = face[1] w_ = face[2] h_ = face[3] score = face.score x, y = int(x_ + w_ / 2), int(y_ + h_ / 2) print(x, y) cv2.rectangle(img, (x_, y_), (x_ + w_, y_ + h_), (0, 0, 255), 2) # Head pose estimation faceBbox = x_, y_, x_ + w_, y_ + h_ y, p, r = headpose.detectHeadPose(img, faceBbox) # To normalize detections we fit with min and max angular examples from dataset MinMaxY = [-86.73414612, 87.62715149] MinMaxP = [-54.0966301, 36.50032043] MinMaxR = [-42.67565918, 42.16217041] MinMaxY.append(y) MinMaxP.append(p) MinMaxR.append(r) y = min_max_scaler.fit_transform( np.array(MinMaxY).reshape((-1, 1))) p = min_max_scaler.fit_transform( np.array(MinMaxP).reshape((-1, 1))) r = min_max_scaler.fit_transform( np.array(MinMaxR).reshape((-1, 1))) headPoseAngles = float(y[-1]), float(p[-1]), float(r[-1]) print('Head pose normalized: ', headPoseAngles) center_x = eye_center[0] / width center_y = eye_center[1] / height img = cv2.circle(img, (x, y), 2, (255, 0, 255), thickness=2) # Magento heatmap, p_x, p_y = test(net, originalImg, (x, y), headPoseAngles) img = cv2.circle(img, (int(p_x * width), int(p_y * height)), 2, (255, 0, 0), thickness=2) # Azul img = draw_result(img, (center_x, center_y), heatmap, (p_x, p_y)) else: break img = np.concatenate((img, img2), axis=1) img2 = img # Write the frame into the file 'output.avi' # img = cv2.resize(img, (1280, 480)) # out.write(img) cv2.imshow('result', img) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() # out.release() cv2.destroyAllWindows()
def main(): args = setup_train_args() # 日志同时输出到文件和console global logger logger = create_logger(args) # 当用户使用GPU,并且GPU可用时 args.cuda = torch.cuda.is_available() and not args.no_cuda device = 'cuda' if args.cuda else 'cpu' logger.info('using device:{}'.format(device)) # 为CPU设置种子用于生成随机数,以使得结果是确定的 # 为当前GPU设置随机种子;如果使用多个GPU,应该使用torch.cuda.manual_seed_all()为所有的GPU设置种子。 # 当得到比较好的结果时我们通常希望这个结果是可以复现 if args.seed: set_random_seed(args) # 设置使用哪些显卡进行训练 os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 初始化tokenizer tokenizer = BertTokenizer(vocab_file=args.vocab_path) # tokenizer的字典大小 vocab_size = len(tokenizer) global pad_id pad_id = tokenizer.convert_tokens_to_ids(PAD) # 创建对话模型的输出目录 if not os.path.exists(args.dialogue_model_output_path): os.mkdir(args.dialogue_model_output_path) # 创建MMI模型的输出目录 if not os.path.exists(args.mmi_model_output_path): os.mkdir(args.mmi_model_output_path) # 加载GPT2模型 model, n_ctx = create_model(args, vocab_size) model.to(device) # 对原始数据进行预处理,将原始语料转换成对应的token_id if args.raw and args.train_mmi: # 如果当前是要训练MMI模型 preprocess_mmi_raw_data(args, tokenizer, n_ctx) elif args.raw and not args.train_mmi: # 如果当前是要训练对话生成模型 preprocess_raw_data(args, tokenizer, n_ctx) # 是否使用多块GPU进行并行运算 multi_gpu = False if args.cuda and torch.cuda.device_count() > 1: logger.info("Let's use GPUs to train") model = DataParallel( model, device_ids=[int(i) for i in args.device.split(',')]) multi_gpu = True # 记录模型参数数量 num_parameters = 0 parameters = model.parameters() for parameter in parameters: num_parameters += parameter.numel() logger.info('number of model parameters: {}'.format(num_parameters)) # 加载数据 logger.info("loading traing data") if args.train_mmi: # 如果是训练MMI模型 with open(args.train_mmi_tokenized_path, "r", encoding="utf8") as f: data = f.read() else: # 如果是训练对话生成模型 with open(args.train_tokenized_path, "r", encoding="utf8") as f: data = f.read() data_list = data.split("\n") train_list, test_list = train_test_split(data_list, test_size=0.2, random_state=1) # 开始训练 train(model, device, train_list, multi_gpu, args) # 测试模型 evaluate(model, device, test_list, multi_gpu, args)
def main(): opt = Config() opt.num_classes = len(get_train_labels(opt.train_root, opt.criteria_list)) opt.metric = 'liner' distance_path = opt.distance_path mean_path = opt.mean_files_path alpha_rank = opt.ALPHA_RAN labellist = getlabellist(opt.criteria_list) train_labels = get_train_labels(opt.train_root, opt.criteria_list) # recreate or first create weibull_model = weibull_tailfitting(mean_path, distance_path, train_labels, tailsize=opt.WEIBULL_TAIL_SIZE, distance_type=opt.distance_type) # data loader test_dataset = Dataset(opt.test_root, opt.test_list, phase='test', input_shape=opt.input_shape) test_loader = data.DataLoader(test_dataset, batch_size=opt.test_batch_size, shuffle=True, num_workers=opt.num_workers) # load model , both of feature, fc_modeal if opt.backbone == 'resnet18': model = resnet_face18(opt.use_se) elif opt.backbone == 'resnet34': model = resnet34() elif opt.backbone == 'resnet50': model = resnet50() else: raise TypeError('backbone: {} is not expected'.format(opt.backbone)) model = DataParallel(model) model.to(device) if device == 'cuda': model.load_state_dict(opt.test_model_path) else: model.load_state_dict( torch.load(opt.test_model_path, map_location={'cuda:0': 'cpu'})) model.eval() if opt.metric == 'add_margin': metric_fc = AddMarginProduct(512, opt.num_classes, s=30, m=0.35) elif opt.metric == 'arc_margin': metric_fc = ArcMarginProduct(512, opt.num_classes, s=30, m=0.5, easy_margin=opt.easy_margin) elif opt.metric == 'sphere': metric_fc = SphereProduct(512, opt.num_classes, m=4) else: metric_fc = nn.Linear(512, opt.num_classes) metric_fc.to(device) metric_fc = DataParallel(metric_fc) if device == 'cuda': metric_fc.load_state_dict(opt.test_metric_fc_path) else: metric_fc.load_state_dict( torch.load(opt.test_metric_fc_path, map_location={'cuda:0': 'cpu'})) metric_fc.eval() print(labellist) openmax_preds_list = [] softmax_preds_list = [] ans_preds_list = [] softmax_data_list_known = [] softmax_data_list_unknown = [] # # data loader # test_dataset = Dataset('estimate_visualize', 't-SNE_test1568010965.919611.png', phase='test', input_shape=opt.input_shape) # # test_loader = data.DataLoader(test_dataset, # batch_size=1, # shuffle=True, # num_workers=opt.num_workers) # # from PIL import Image # # img = Image.open('estimate_visualize/t-SNE_test1568010965.919611.png', ) # # img = np.array(img) # # img = img[np.newaxis,:,:] # # print(img.shape) # # tt = test_dataset.transforms # # # for t in tt: # # img = tt(img) # # # img = img.resize(256,256) # # # img = torch.Tensor(img) # for i, (imgs, label_ids) in enumerate(test_loader): # # compute feature and estimate score → create img_preds that contains feature, score # imgs_feature = model(imgs) # # scores = metric_fc(imgs_feature, label_ids) # scores = metric_fc(imgs_feature) # scores = scores.detach().numpy() # print(scores) # # ff = model(img) # # score = metric_fc(ff) # print(scores) # print(softmax(scores[0])) for i, (imgs, label_ids) in enumerate(test_loader): # compute feature and estimate score → create img_preds that contains feature, score imgs_feature = model(imgs) # scores = metric_fc(imgs_feature, label_ids) scores = metric_fc(imgs_feature) scores = scores.detach().numpy() scores = np.array(scores)[:, np.newaxis, :] temp_labels = [labellist[pid] for pid in label_ids] for ii, (score, label) in enumerate(zip(scores, temp_labels)): openmax_predict, softmax_predict = openmax( score, weibull_model, train_labels, eu_weight=opt.euc_scale, alpharank=alpha_rank, distance_type=opt.distance_type) softmax_ans = labellist[np.argmax(softmax_predict)] # type 1 # openmax_ans = labellist[np.argmax(openmax_predict)] if np.argmax(openmax_predict) < len( # train_labels) else 'unknown' # type2 # openmax_ans = softmax_ans if np.sort(score, axis=1)[0][::-1][0] > opt.SCORE_THRESHOLD else 'unknown' # type3 openmax_ans = softmax_ans if np.sort( score, axis=1)[0][::-1][0] / np.linalg.norm( score, ord=2) > opt.SCORE_NORMALIZED else 'unknown' # type4 openmax_ans = softmax_ans if np.sort( score, axis=1)[0][::-1][0] / np.linalg.norm( score[score > 0], ord=2) > opt.SCORE_NORMALIZED else 'unknown' ans_label = label if labellist.index(label) < len( train_labels) else 'unknown' if ans_label == 'unknown': softmax_data_list_unknown.append( np.sort(score, axis=1)[0][::-1][0] / np.linalg.norm(score[score > 0], ord=2)) if np.sort(score, axis=1)[0][::-1][0] / np.linalg.norm( score[score > 0], ord=2) > 0.7: import matplotlib.pyplot as plt print(label) plt.imshow(np.array(imgs[ii][0])) plt.savefig('estimate_visualize/{}_{}.jpg'.format( i, label)) plt.show() else: softmax_data_list_known.append( np.sort(score, axis=1)[0][::-1][0] / np.linalg.norm(score[score > 0], ord=2)) # if ans_label == 'unknown': # softmax_data_list_unknown.append(score[0]) # else: # softmax_data_list_known.append(score[0]) openmax_preds_list.append(openmax_ans) softmax_preds_list.append(softmax_ans) ans_preds_list.append(ans_label) print( 'predict_softmax: {}, predict_openmax: {}, answer: {}'.format( softmax_ans, openmax_ans, ans_label)) # create_mean_graph(softmax_data_list_known) # create_mean_graph(softmax_data_list_unknown) show_histgram(softmax_data_list_unknown) show_histgram(softmax_data_list_known) # accuracy check soft_acc = accuracy(softmax_preds_list, ans_preds_list) open_acc = accuracy(openmax_preds_list, ans_preds_list) print('softmax:', soft_acc / len(ans_preds_list)) print('openmax:', open_acc / len(ans_preds_list))
def run(config): from datasets import myDataset config, model, loss, warp, trainer, train_data, val_data, train_loader, val_loader = prepare( config) # print(model) # data, gt_prob_fpn, gt_coord_prob_fpn, gt_coord_diff_fpn, gt_diff_fpn, gt_connects_fpn, self.cases[idx] = train_data[0] # print(data.shape) # exit() if config.test: print('Start testing') #if hasattr(model, 'test'): # model.forward = model.test model = DataParallel(model.cuda()) tester = Tester(model, config) val_data = myDataset(config, 'test') test_loader = DataLoader(val_data, batch_size=1, shuffle=False, num_workers=3, pin_memory=True, collate_fn=lambda x: x) tester.test(test_loader) return elif config.val: print('Start Val') start_epoch = config.train['start_epoch'] trainer.validate(start_epoch, val_loader, save=True) else: start_epoch = config.train['start_epoch'] epoch = config.train["epoch"] print('Start training from %d-th epoch' % start_epoch) epoch2loss = {} for i in range(start_epoch, epoch + 1): try: # no hardming if 'hardmining' in config.prepare and config.prepare[ 'hardmining']: train_loader.dataset.resample3() json.dump( [str(item) for item in train_loader.dataset.samples], open( os.path.join(trainer.save_dir, 'sample_%d.json' % (i)), 'w'), indent=2) json.dump( { k: str(v) for k, v in train_loader.dataset.sample_weights.items() }, open( os.path.join(trainer.save_dir, 'sample_weights_%d.json' % (i)), 'w'), indent=2) #json.dump({k: str(v) for k, v in train_loader.dataset.neg_sample_weights.items()}, open(os.path.join(trainer.save_dir, 'neg_sample_weights_%d.json'%(i)), 'w'), indent=2) loss_list = trainer.train(i, train_loader) epoch2loss[i] = list(loss_list) trainer.validate(i, val_loader) except KeyboardInterrupt as e: traceback.print_exc() trainer.ioer.save_file(trainer.net, i, trainer.args, 1e10, isbreak=True) sys.exit(0) print(epoch2loss) with open('./epoch_loss.json', 'w') as f: f.write(json.dumps(epoch2loss))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--device', default='0,1,2,3', type=str, required=False, help='设置使用哪些显卡') parser.add_argument('--model_config', default='config/model_config_small.json', type=str, required=False, help='选择模型参数') parser.add_argument('--tokenizer_path', default='cache/vocab_small.txt', type=str, required=False, help='选择词库') parser.add_argument('--raw_data_path', default='data/train.json', type=str, required=False, help='原始训练语料') parser.add_argument('--tokenized_data_path', default='data/tokenized/', type=str, required=False, help='tokenized语料存放位置') parser.add_argument('--raw', action='store_true', help='是否先做tokenize') parser.add_argument('--epochs', default=5, type=int, required=False, help='训练循环') parser.add_argument('--batch_size', default=8, type=int, required=False, help='训练batch size') parser.add_argument('--lr', default=1.5e-4, type=float, required=False, help='学习率') parser.add_argument('--warmup_steps', default=2000, type=int, required=False, help='warm up步数') parser.add_argument('--log_step', default=1, type=int, required=False, help='多少步汇报一次loss') parser.add_argument('--stride', default=768, type=int, required=False, help='训练时取训练数据的窗口步长') parser.add_argument('--gradient_accumulation', default=1, type=str, required=False, help='梯度积累') parser.add_argument('--fp16', action='store_true', help='混合精度') parser.add_argument('--fp16_opt_level', default='O1', type=str, required=False) parser.add_argument('--max_grad_norm', default=1.0, type=float, required=False) parser.add_argument('--num_pieces', default=100, type=int, required=False, help='将训练语料分成多少份') parser.add_argument('--min_length', default=128, type=int, required=False, help='最短收录文章长度') parser.add_argument('--output_dir', default='model/', type=str, required=False, help='模型输出路径') parser.add_argument('--pretrained_model', default='', type=str, required=False, help='模型训练起点路径') parser.add_argument('--writer_dir', default='tensorboard_summary/', type=str, required=False, help='Tensorboard路径') parser.add_argument('--no_wordpiece', action='store_true', help='不做word piece切词') args = parser.parse_args() print('args:\n' + args.__repr__()) if args.no_wordpiece: import tokenization_bert_without_wordpiece as tokenization_bert else: import tokenization_bert os.environ["CUDA_VISIBLE_DEVICES"] = args.device # 此处设置程序使用哪些显卡 model_config = pytorch_transformers.modeling_gpt2.GPT2Config.from_json_file( args.model_config) print('config:\n' + model_config.to_json_string()) n_ctx = model_config.n_ctx full_tokenizer = tokenization_bert.BertTokenizer( vocab_file=args.tokenizer_path) full_tokenizer.max_len = n_ctx device = 'cuda' if torch.cuda.is_available() else 'cpu' print('using device:', device) raw_data_path = args.raw_data_path tokenized_data_path = args.tokenized_data_path raw = args.raw # 选择是否从零开始构建数据集 epochs = args.epochs batch_size = args.batch_size lr = args.lr warmup_steps = args.warmup_steps log_step = args.log_step stride = args.stride gradient_accumulation = args.gradient_accumulation fp16 = args.fp16 # 不支持半精度的显卡请勿打开 fp16_opt_level = args.fp16_opt_level max_grad_norm = args.max_grad_norm num_pieces = args.num_pieces min_length = args.min_length output_dir = args.output_dir tb_writer = SummaryWriter(log_dir=args.writer_dir) if not os.path.exists(output_dir): os.mkdir(output_dir) if raw: print('building files') build_files(data_path=raw_data_path, tokenized_data_path=tokenized_data_path, num_pieces=num_pieces, full_tokenizer=full_tokenizer, min_length=min_length) print('files built') if not args.pretrained_model: model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel( config=model_config) else: model = pytorch_transformers.modeling_gpt2.GPT2LMHeadModel.from_pretrained( args.pretrained_model) model.train() model.to(device) num_parameters = 0 parameters = model.parameters() for parameter in parameters: num_parameters += parameter.numel() print('number of parameters: {}'.format(num_parameters)) multi_gpu = False full_len = 0 print('calculating total steps') for i in tqdm(range(num_pieces)): with open(tokenized_data_path + 'tokenized_train_{}.txt'.format(i), 'r') as f: full_len += len([int(item) for item in f.read().strip().split()]) total_steps = int(full_len / stride * epochs / batch_size / gradient_accumulation) print('total steps = {}'.format(total_steps)) optimizer = pytorch_transformers.AdamW(model.parameters(), lr=lr, correct_bias=True) scheduler = pytorch_transformers.WarmupLinearSchedule( optimizer, warmup_steps=warmup_steps, t_total=total_steps) if fp16: try: from apex import amp except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) model, optimizer = amp.initialize(model, optimizer, opt_level=fp16_opt_level) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = DataParallel(model) multi_gpu = True print('starting training') overall_step = 0 for epoch in range(epochs): print('epoch {}'.format(epoch + 1)) now = datetime.now() print('time: {}'.format(now)) x = np.linspace(0, num_pieces - 1, num_pieces, dtype=np.int32) random.shuffle(x) piece_num = 0 for i in x: running_loss = 0 with open(tokenized_data_path + 'tokenized_train_{}.txt'.format(i), 'r') as f: line = f.read().strip() tokens = line.split() tokens = [int(token) for token in tokens] start_point = 0 samples = [] while start_point < len(tokens) - n_ctx: samples.append(tokens[start_point:start_point + n_ctx]) start_point += stride start_point -= stride last = tokens[start_point + n_ctx:] last.extend([ full_tokenizer.convert_tokens_to_ids(['[PAD]']) * (n_ctx - len(last)) ]) random.shuffle(samples) for step in range(len(samples) // batch_size): # drop last # prepare data batch = samples[step * batch_size:(step + 1) * batch_size] batch_labels = [] batch_inputs = [] for ids in batch: int_ids_for_labels = [int(x) for x in ids] int_ids_for_inputs = [int(x) for x in ids] batch_labels.append(int_ids_for_labels) batch_inputs.append(int_ids_for_inputs) batch_labels = torch.tensor(batch_labels).long().to(device) batch_inputs = torch.tensor(batch_inputs).long().to(device) # forward pass outputs = model.forward(input_ids=batch_inputs, labels=batch_labels) loss, logits = outputs[:2] # get loss if multi_gpu: loss = loss.mean() if gradient_accumulation > 1: loss = loss / gradient_accumulation # loss backward if fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() torch.nn.utils.clip_grad_norm_( amp.master_params(optimizer), max_grad_norm) else: loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) # optimizer step if (step + 1) % gradient_accumulation == 0: running_loss += loss.item() scheduler.step() optimizer.step() optimizer.zero_grad() overall_step += 1 if (overall_step + 1) % log_step == 0: tb_writer.add_scalar('loss', loss.item(), overall_step) if (overall_step + 1) % log_step == 0: print( 'now time: {}:{}. Step {} of piece {} of epoch {}, loss {}' .format( datetime.now().hour, datetime.now().minute, (step + 1) // gradient_accumulation, piece_num, epoch + 1, running_loss * gradient_accumulation / log_step)) running_loss = 0 piece_num += 1 print('saving model for epoch {}'.format(epoch + 1)) if not os.path.exists(output_dir + 'model_epoch{}'.format(epoch + 1)): os.mkdir(output_dir + 'model_epoch{}'.format(epoch + 1)) model_to_save = model.module if hasattr(model, 'module') else model model_to_save.save_pretrained(output_dir + 'model_epoch{}'.format(epoch + 1)) # torch.save(scheduler.state_dict(), output_dir + 'model_epoch{}/scheduler.pt'.format(epoch + 1)) # torch.save(optimizer.state_dict(), output_dir + 'model_epoch{}/optimizer.pt'.format(epoch + 1)) print('epoch {} finished'.format(epoch + 1)) then = datetime.now() print('time: {}'.format(then)) print('time for one epoch: {}'.format(then - now)) print('training finished') if not os.path.exists(output_dir + 'final_model'): os.mkdir(output_dir + 'final_model') model_to_save = model.module if hasattr(model, 'module') else model model_to_save.save_pretrained(output_dir + 'final_model')
def main(args): # ================================================ # Preparation # ================================================ args.data_dir = os.path.expanduser(args.data_dir) args.result_dir = os.path.expanduser(args.result_dir) if args.init_model_cn != None: args.init_model_cn = os.path.expanduser(args.init_model_cn) if args.init_model_cd != None: args.init_model_cd = os.path.expanduser(args.init_model_cd) if torch.cuda.is_available() == False: raise Exception('At least one gpu must be available.') else: gpu = torch.device('cuda:0') # create result directory (if necessary) if os.path.exists(args.result_dir) == False: os.makedirs(args.result_dir) for s in ['phase_1', 'phase_2', 'phase_3']: if os.path.exists(os.path.join(args.result_dir, s)) == False: os.makedirs(os.path.join(args.result_dir, s)) # dataset trnsfm = transforms.Compose([ transforms.Resize(args.cn_input_size), transforms.RandomCrop((args.cn_input_size, args.cn_input_size)), transforms.ToTensor(), ]) print('loading dataset... (it may take a few minutes)') train_dset = ImageDataset(os.path.join(args.data_dir, 'train'), trnsfm, recursive_search=args.recursive_search) test_dset = ImageDataset(os.path.join(args.data_dir, 'test'), trnsfm, recursive_search=args.recursive_search) train_loader = DataLoader(train_dset, batch_size=(args.bsize // args.bdivs), shuffle=True) # compute mean pixel value of training dataset mpv = np.zeros(shape=(3, )) if args.mpv == None: pbar = tqdm(total=len(train_dset.imgpaths), desc='computing mean pixel value for training dataset...') for imgpath in train_dset.imgpaths: img = Image.open(imgpath) x = np.array(img, dtype=np.float32) / 255. mpv += x.mean(axis=(0, 1)) pbar.update() mpv /= len(train_dset.imgpaths) pbar.close() else: mpv = np.array(args.mpv) # save training config mpv_json = [] for i in range(3): mpv_json.append(float(mpv[i])) # convert to json serializable type args_dict = vars(args) args_dict['mpv'] = mpv_json with open(os.path.join(args.result_dir, 'config.json'), mode='w') as f: json.dump(args_dict, f) # make mpv & alpha tensor mpv = torch.tensor(mpv.astype(np.float32).reshape(1, 3, 1, 1)).to(gpu) alpha = torch.tensor(args.alpha).to(gpu) # ================================================ # Training Phase 1 # ================================================ model_cn = CompletionNetwork() if args.data_parallel: model_cn = DataParallel(model_cn) if args.init_model_cn != None: model_cn.load_state_dict( torch.load(args.init_model_cn, map_location='cpu')) if args.optimizer == 'adadelta': opt_cn = Adadelta(model_cn.parameters()) else: opt_cn = Adam(model_cn.parameters()) model_cn = model_cn.to(gpu) # training cnt_bdivs = 0 pbar = tqdm(total=args.steps_1) while pbar.n < args.steps_1: for x in train_loader: # forward x = x.to(gpu) mask = gen_input_mask( shape=(x.shape[0], 1, x.shape[2], x.shape[3]), hole_size=((args.hole_min_w, args.hole_max_w), (args.hole_min_h, args.hole_max_h)), hole_area=gen_hole_area( (args.ld_input_size, args.ld_input_size), (x.shape[3], x.shape[2])), max_holes=args.max_holes, ).to(gpu) x_mask = x - x * mask + mpv * mask input = torch.cat((x_mask, mask), dim=1) output = model_cn(input) loss = completion_network_loss(x, output, mask) # backward loss.backward() cnt_bdivs += 1 if cnt_bdivs >= args.bdivs: cnt_bdivs = 0 # optimize opt_cn.step() # clear grads opt_cn.zero_grad() # update progbar pbar.set_description('phase 1 | train loss: %.5f' % loss.cpu()) pbar.update() # test if pbar.n % args.snaperiod_1 == 0: with torch.no_grad(): x = sample_random_batch( test_dset, batch_size=args.num_test_completions).to(gpu) mask = gen_input_mask( shape=(x.shape[0], 1, x.shape[2], x.shape[3]), hole_size=((args.hole_min_w, args.hole_max_w), (args.hole_min_h, args.hole_max_h)), hole_area=gen_hole_area( (args.ld_input_size, args.ld_input_size), (x.shape[3], x.shape[2])), max_holes=args.max_holes, ).to(gpu) x_mask = x - x * mask + mpv * mask input = torch.cat((x_mask, mask), dim=1) output = model_cn(input) completed = poisson_blend(x, output, mask) imgs = torch.cat( (x.cpu(), x_mask.cpu(), completed.cpu()), dim=0) imgpath = os.path.join(args.result_dir, 'phase_1', 'step%d.png' % pbar.n) model_cn_path = os.path.join( args.result_dir, 'phase_1', 'model_cn_step%d' % pbar.n) save_image(imgs, imgpath, nrow=len(x)) if args.data_parallel: torch.save(model_cn.module.state_dict(), model_cn_path) else: torch.save(model_cn.state_dict(), model_cn_path) # terminate if pbar.n >= args.steps_1: break pbar.close() # ================================================ # Training Phase 2 # ================================================ model_cd = ContextDiscriminator( local_input_shape=(3, args.ld_input_size, args.ld_input_size), global_input_shape=(3, args.cn_input_size, args.cn_input_size), arc=args.arc, ) if args.data_parallel: model_cd = DataParallel(model_cd) if args.init_model_cd != None: model_cd.load_state_dict( torch.load(args.init_model_cd, map_location='cpu')) if args.optimizer == 'adadelta': opt_cd = Adadelta(model_cd.parameters()) else: opt_cd = Adam(model_cd.parameters()) model_cd = model_cd.to(gpu) bceloss = BCELoss() # training cnt_bdivs = 0 pbar = tqdm(total=args.steps_2) while pbar.n < args.steps_2: for x in train_loader: # fake forward x = x.to(gpu) hole_area_fake = gen_hole_area( (args.ld_input_size, args.ld_input_size), (x.shape[3], x.shape[2])) mask = gen_input_mask( shape=(x.shape[0], 1, x.shape[2], x.shape[3]), hole_size=((args.hole_min_w, args.hole_max_w), (args.hole_min_h, args.hole_max_h)), hole_area=hole_area_fake, max_holes=args.max_holes, ).to(gpu) fake = torch.zeros((len(x), 1)).to(gpu) x_mask = x - x * mask + mpv * mask input_cn = torch.cat((x_mask, mask), dim=1) output_cn = model_cn(input_cn) input_gd_fake = output_cn.detach() input_ld_fake = crop(input_gd_fake, hole_area_fake) output_fake = model_cd( (input_ld_fake.to(gpu), input_gd_fake.to(gpu))) loss_fake = bceloss(output_fake, fake) # real forward hole_area_real = gen_hole_area(size=(args.ld_input_size, args.ld_input_size), mask_size=(x.shape[3], x.shape[2])) real = torch.ones((len(x), 1)).to(gpu) input_gd_real = x input_ld_real = crop(input_gd_real, hole_area_real) output_real = model_cd((input_ld_real, input_gd_real)) loss_real = bceloss(output_real, real) # reduce loss = (loss_fake + loss_real) / 2. # backward loss.backward() cnt_bdivs += 1 if cnt_bdivs >= args.bdivs: cnt_bdivs = 0 # optimize opt_cd.step() # clear grads opt_cd.zero_grad() # update progbar pbar.set_description('phase 2 | train loss: %.5f' % loss.cpu()) pbar.update() # test if pbar.n % args.snaperiod_2 == 0: with torch.no_grad(): x = sample_random_batch( test_dset, batch_size=args.num_test_completions).to(gpu) mask = gen_input_mask( shape=(x.shape[0], 1, x.shape[2], x.shape[3]), hole_size=((args.hole_min_w, args.hole_max_w), (args.hole_min_h, args.hole_max_h)), hole_area=gen_hole_area( (args.ld_input_size, args.ld_input_size), (x.shape[3], x.shape[2])), max_holes=args.max_holes, ).to(gpu) x_mask = x - x * mask + mpv * mask input = torch.cat((x_mask, mask), dim=1) output = model_cn(input) completed = poisson_blend(x, output, mask) imgs = torch.cat( (x.cpu(), x_mask.cpu(), completed.cpu()), dim=0) imgpath = os.path.join(args.result_dir, 'phase_2', 'step%d.png' % pbar.n) model_cd_path = os.path.join( args.result_dir, 'phase_2', 'model_cd_step%d' % pbar.n) save_image(imgs, imgpath, nrow=len(x)) if args.data_parallel: torch.save(model_cd.module.state_dict(), model_cd_path) else: torch.save(model_cd.state_dict(), model_cd_path) # terminate if pbar.n >= args.steps_2: break pbar.close() # ================================================ # Training Phase 3 # ================================================ # training cnt_bdivs = 0 pbar = tqdm(total=args.steps_3) while pbar.n < args.steps_3: for x in train_loader: # forward model_cd x = x.to(gpu) hole_area_fake = gen_hole_area( (args.ld_input_size, args.ld_input_size), (x.shape[3], x.shape[2])) mask = gen_input_mask( shape=(x.shape[0], 1, x.shape[2], x.shape[3]), hole_size=((args.hole_min_w, args.hole_max_w), (args.hole_min_h, args.hole_max_h)), hole_area=hole_area_fake, max_holes=args.max_holes, ).to(gpu) # fake forward fake = torch.zeros((len(x), 1)).to(gpu) x_mask = x - x * mask + mpv * mask input_cn = torch.cat((x_mask, mask), dim=1) output_cn = model_cn(input_cn) input_gd_fake = output_cn.detach() input_ld_fake = crop(input_gd_fake, hole_area_fake) output_fake = model_cd((input_ld_fake, input_gd_fake)) loss_cd_fake = bceloss(output_fake, fake) # real forward hole_area_real = gen_hole_area(size=(args.ld_input_size, args.ld_input_size), mask_size=(x.shape[3], x.shape[2])) real = torch.ones((len(x), 1)).to(gpu) input_gd_real = x input_ld_real = crop(input_gd_real, hole_area_real) output_real = model_cd((input_ld_real, input_gd_real)) loss_cd_real = bceloss(output_real, real) # reduce loss_cd = (loss_cd_fake + loss_cd_real) * alpha / 2. # backward model_cd loss_cd.backward() cnt_bdivs += 1 if cnt_bdivs >= args.bdivs: # optimize opt_cd.step() # clear grads opt_cd.zero_grad() # forward model_cn loss_cn_1 = completion_network_loss(x, output_cn, mask) input_gd_fake = output_cn input_ld_fake = crop(input_gd_fake, hole_area_fake) output_fake = model_cd((input_ld_fake, (input_gd_fake))) loss_cn_2 = bceloss(output_fake, real) # reduce loss_cn = (loss_cn_1 + alpha * loss_cn_2) / 2. # backward model_cn loss_cn.backward() if cnt_bdivs >= args.bdivs: cnt_bdivs = 0 # optimize opt_cn.step() # clear grads opt_cn.zero_grad() # update progbar pbar.set_description( 'phase 3 | train loss (cd): %.5f (cn): %.5f' % (loss_cd.cpu(), loss_cn.cpu())) pbar.update() # test if pbar.n % args.snaperiod_3 == 0: with torch.no_grad(): x = sample_random_batch( test_dset, batch_size=args.num_test_completions).to(gpu) mask = gen_input_mask( shape=(x.shape[0], 1, x.shape[2], x.shape[3]), hole_size=((args.hole_min_w, args.hole_max_w), (args.hole_min_h, args.hole_max_h)), hole_area=gen_hole_area( (args.ld_input_size, args.ld_input_size), (x.shape[3], x.shape[2])), max_holes=args.max_holes, ).to(gpu) x_mask = x - x * mask + mpv * mask input = torch.cat((x_mask, mask), dim=1) output = model_cn(input) completed = poisson_blend(x, output, mask) imgs = torch.cat( (x.cpu(), x_mask.cpu(), completed.cpu()), dim=0) imgpath = os.path.join(args.result_dir, 'phase_3', 'step%d.png' % pbar.n) model_cn_path = os.path.join( args.result_dir, 'phase_3', 'model_cn_step%d' % pbar.n) model_cd_path = os.path.join( args.result_dir, 'phase_3', 'model_cd_step%d' % pbar.n) save_image(imgs, imgpath, nrow=len(x)) if args.data_parallel: torch.save(model_cn.module.state_dict(), model_cn_path) torch.save(model_cd.module.state_dict(), model_cd_path) else: torch.save(model_cn.state_dict(), model_cn_path) torch.save(model_cd.state_dict(), model_cd_path) # terminate if pbar.n >= args.steps_3: break pbar.close()
def train(args): print('start training...') model, model_file = create_model(args) #model = model.cuda() if torch.cuda.device_count() > 1: model_name = model.name model = DataParallel(model) model.name = model_name model = model.cuda() if args.optim == 'Adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0001) else: optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0001) if args.lrs == 'plateau': lr_scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=args.factor, patience=args.patience, min_lr=args.min_lr) else: lr_scheduler = CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr) #ExponentialLR(optimizer, 0.9, last_epoch=-1) #CosineAnnealingLR(optimizer, 15, 1e-7) if args.balanced: _, val_loader = get_balanced_train_val_loaders( num_classes=args.num_classes, start_index=args.start_index, batch_size=args.batch_size, val_batch_size=args.val_batch_size, val_num=args.val_num, other=args.other) else: _, val_loader = get_train_val_loaders( num_classes=args.num_classes, start_index=args.start_index, batch_size=args.batch_size, val_batch_size=args.val_batch_size, val_num=args.val_num, other=args.other) best_top1_acc = 0. print( 'epoch | lr | % | loss | avg | loss | top1 | top10 | best | time | save |' ) if not args.no_first_val: top10_acc, best_top1_acc, total_loss = validate( args, model, val_loader) print( 'val | | | | | {:.4f} | {:.4f} | {:.4f} | {:.4f} | | |' .format(total_loss, best_top1_acc, top10_acc, best_top1_acc)) if args.val: return model.train() if args.lrs == 'plateau': lr_scheduler.step(best_top1_acc) else: lr_scheduler.step() train_iter = 0 for epoch in range(args.start_epoch, args.epochs): if args.balanced: train_loader, val_loader = get_balanced_train_val_loaders( num_classes=args.num_classes, start_index=args.start_index, batch_size=args.batch_size, dev_mode=args.dev_mode, val_batch_size=args.val_batch_size, val_num=args.val_num, other=args.other) else: train_loader, val_loader = get_train_val_loaders( num_classes=args.num_classes, start_index=args.start_index, batch_size=args.batch_size, dev_mode=args.dev_mode, val_batch_size=args.val_batch_size, val_num=args.val_num, other=args.other) train_loss = 0 current_lr = get_lrs( optimizer) #optimizer.state_dict()['param_groups'][2]['lr'] bg = time.time() for batch_idx, data in enumerate(train_loader): train_iter += 1 img, target = data img, target = img.cuda(), target.cuda() loss = model(img, target).sum() / img.size(0) #loss = criterion(args, output, target) #(img.size(0) * loss).backward() loss.backward() optimizer.step() optimizer.zero_grad() train_loss += loss.item() print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format( epoch, float(current_lr[0]), args.batch_size * (batch_idx + 1), train_loader.num, loss.item(), train_loss / (batch_idx + 1)), end='') if train_iter > 0 and train_iter % args.iter_val == 0: if isinstance(model, DataParallel): torch.save(model.module.state_dict(), model_file + '_latest') else: torch.save(model.state_dict(), model_file + '_latest') top10_acc, top1_acc, total_loss = validate( args, model, val_loader) _save_ckp = '' if args.always_save or top1_acc > best_top1_acc: best_top1_acc = top1_acc if isinstance(model, DataParallel): torch.save(model.module.state_dict(), model_file) else: torch.save(model.state_dict(), model_file) _save_ckp = '*' print(' {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.2f} | {:4s} |'. format(total_loss, top1_acc, top10_acc, best_top1_acc, (time.time() - bg) / 60, _save_ckp)) model.train() if args.lrs == 'plateau': lr_scheduler.step(top1_acc) else: lr_scheduler.step() current_lr = get_lrs(optimizer)
def main(args, logger): # trn_df = pd.read_csv(f'{MNT_DIR}/inputs/origin/train.csv') trn_df = pd.read_pickle(f'{MNT_DIR}/inputs/nes_info/trn_df.pkl') trn_df['is_original'] = 1 # aug_df = pd.read_pickle(f'{MNT_DIR}/inputs/nes_info/ContextualWordEmbsAug_sub_df.pkl') # aug_df['is_original'] = 0 # trn_df = pd.concat([trn_df, aug_df], axis=0).reset_index(drop=True) gkf = GroupKFold(n_splits=5).split( X=trn_df.question_body, groups=trn_df.question_body_le, ) histories = { 'trn_loss': {}, 'val_loss': {}, 'val_metric': {}, 'val_metric_raws': {}, } loaded_fold = -1 loaded_epoch = -1 if args.checkpoint: histories, loaded_fold, loaded_epoch = load_checkpoint(args.checkpoint) # calc max_seq_len using quest dataset # max_seq_len = QUESTDataset( # df=trn_df, # mode='train', # tokens=[], # augment=[], # pretrained_model_name_or_path=TOKENIZER_PRETRAIN, # ).MAX_SEQUENCE_LENGTH # max_seq_len = 9458 # max_seq_len = 1504 max_seq_len = 512 fold_best_metrics = [] fold_best_metrics_raws = [] for fold, (trn_idx, val_idx) in enumerate(gkf): if fold < loaded_fold: fold_best_metrics.append(np.max(histories["val_metric"][fold])) fold_best_metrics_raws.append( histories["val_metric_raws"][fold][np.argmax( histories["val_metric"][fold])]) continue sel_log( f' --------------------------- start fold {fold} --------------------------- ', logger) fold_trn_df = trn_df.iloc[trn_idx] # .query('is_original == 1') fold_trn_df = fold_trn_df.drop(['is_original', 'question_body_le'], axis=1) # use only original row fold_val_df = trn_df.iloc[val_idx].query('is_original == 1') fold_val_df = fold_val_df.drop(['is_original', 'question_body_le'], axis=1) if args.debug: fold_trn_df = fold_trn_df.sample(100, random_state=71) fold_val_df = fold_val_df.sample(100, random_state=71) temp = pd.Series( list( itertools.chain.from_iterable( fold_trn_df.question_title.apply(lambda x: x.split(' ')) + fold_trn_df.question_body.apply(lambda x: x.split(' ')) + fold_trn_df.answer.apply(lambda x: x.split(' ')))) ).value_counts() tokens = temp[temp >= 10].index.tolist() # tokens = [] tokens = [ 'CAT_TECHNOLOGY'.casefold(), 'CAT_STACKOVERFLOW'.casefold(), 'CAT_CULTURE'.casefold(), 'CAT_SCIENCE'.casefold(), 'CAT_LIFE_ARTS'.casefold(), ] trn_dataset = QUESTDataset( df=fold_trn_df, mode='train', tokens=tokens, augment=[], pretrained_model_name_or_path=TOKENIZER_PRETRAIN, MAX_SEQUENCE_LENGTH=max_seq_len, ) # update token trn_sampler = RandomSampler(data_source=trn_dataset) trn_loader = DataLoader(trn_dataset, batch_size=BATCH_SIZE, sampler=trn_sampler, num_workers=os.cpu_count(), worker_init_fn=lambda x: np.random.seed(), drop_last=True, pin_memory=True) val_dataset = QUESTDataset( df=fold_val_df, mode='valid', tokens=tokens, augment=[], pretrained_model_name_or_path=TOKENIZER_PRETRAIN, MAX_SEQUENCE_LENGTH=max_seq_len, ) val_sampler = RandomSampler(data_source=val_dataset) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, sampler=val_sampler, num_workers=os.cpu_count(), worker_init_fn=lambda x: np.random.seed(), drop_last=False, pin_memory=True) fobj = BCEWithLogitsLoss() # fobj = MSELoss() model = BertModelForBinaryMultiLabelClassifier( num_labels=len(LABEL_COL), pretrained_model_name_or_path=MODEL_PRETRAIN, # cat_num=5, token_size=len(trn_dataset.tokenizer), MAX_SEQUENCE_LENGTH=max_seq_len, ) optimizer = optim.Adam(model.parameters(), lr=3e-5) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=MAX_EPOCH, eta_min=1e-5) # load checkpoint model, optim, scheduler if args.checkpoint and fold == loaded_fold: load_checkpoint(args.checkpoint, model, optimizer, scheduler) for epoch in tqdm(list(range(MAX_EPOCH))): if fold <= loaded_fold and epoch <= loaded_epoch: continue if epoch < 1: model.freeze_unfreeze_bert(freeze=True, logger=logger) else: model.freeze_unfreeze_bert(freeze=False, logger=logger) model = DataParallel(model) model = model.to(DEVICE) trn_loss = train_one_epoch(model, fobj, optimizer, trn_loader) val_loss, val_metric, val_metric_raws, val_y_preds, val_y_trues, val_qa_ids = test( model, fobj, val_loader) scheduler.step() if fold in histories['trn_loss']: histories['trn_loss'][fold].append(trn_loss) else: histories['trn_loss'][fold] = [ trn_loss, ] if fold in histories['val_loss']: histories['val_loss'][fold].append(val_loss) else: histories['val_loss'][fold] = [ val_loss, ] if fold in histories['val_metric']: histories['val_metric'][fold].append(val_metric) else: histories['val_metric'][fold] = [ val_metric, ] if fold in histories['val_metric_raws']: histories['val_metric_raws'][fold].append(val_metric_raws) else: histories['val_metric_raws'][fold] = [ val_metric_raws, ] logging_val_metric_raws = '' for val_metric_raw in val_metric_raws: logging_val_metric_raws += f'{float(val_metric_raw):.4f}, ' sel_log( f'fold : {fold} -- epoch : {epoch} -- ' f'trn_loss : {float(trn_loss.detach().to("cpu").numpy()):.4f} -- ' f'val_loss : {float(val_loss.detach().to("cpu").numpy()):.4f} -- ' f'val_metric : {float(val_metric):.4f} -- ' f'val_metric_raws : {logging_val_metric_raws}', logger) model = model.to('cpu') model = model.module save_checkpoint(f'{MNT_DIR}/checkpoints/{EXP_ID}/{fold}', model, optimizer, scheduler, histories, val_y_preds, val_y_trues, val_qa_ids, fold, epoch, val_loss, val_metric) fold_best_metrics.append(np.max(histories["val_metric"][fold])) fold_best_metrics_raws.append( histories["val_metric_raws"][fold][np.argmax( histories["val_metric"][fold])]) save_and_clean_for_prediction(f'{MNT_DIR}/checkpoints/{EXP_ID}/{fold}', trn_dataset.tokenizer, clean=False) del model # calc training stats fold_best_metric_mean = np.mean(fold_best_metrics) fold_best_metric_std = np.std(fold_best_metrics) fold_stats = f'{EXP_ID} : {fold_best_metric_mean:.4f} +- {fold_best_metric_std:.4f}' sel_log(fold_stats, logger) send_line_notification(fold_stats) fold_best_metrics_raws_mean = np.mean(fold_best_metrics_raws, axis=0) fold_raw_stats = '' for metric_stats_raw in fold_best_metrics_raws_mean: fold_raw_stats += f'{float(metric_stats_raw):.4f},' sel_log(fold_raw_stats, logger) send_line_notification(fold_raw_stats) sel_log('now saving best checkpoints...', logger)
def main(): parser = argparse.ArgumentParser( description="""Classtering images using fine-tuned CNN model""") parser.add_argument('--dataset', type=str, help='target dataset') parser.add_argument('--model', type=str, help='path to model') parser.add_argument('--cnn', type=str, help='model archtecture') parser.add_argument('--layer', type=str, help='layer type') parser.add_argument('--layer_num', default=2, type=int, help='layer number') parser.add_argument('--workers', default=0, type=int, help='number of data loading workers (default: 0)') parser.add_argument('--batch_size', default=64, type=int, help='mini-batch size (default: 64)') parser.add_argument('--seed', type=int, default=1, help='random seed') args = parser.parse_args() #fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # load model if args.cnn == 'vgg16': model = models.vgg16(pretrained=None) model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) model.top_layer = nn.Linear(4096, 1222) param = torch.load(args.model) model = DataParallel(model) # for vgg_old model.load_state_dict(param) model = model.module # for vgg_old if args.layer == 'fc': if args.layer_num == 1: new_classifier = nn.Sequential( *list(model.classifier.children())[:-5]) if args.layer_num == 2: new_classifier = nn.Sequential( *list(model.classifier.children())[:-2]) model.classifier = new_classifier model.top_layer = Identity() if args.cnn == 'resnet50': model = models.resnet50(pretrained=None) num_features = model.fc.in_features model.fc = nn.Linear(num_features, 1222) param = torch.load(args.model) model.load_state_dict(param) model.fc = Identity() if args.cnn == 'resnet101': model = models.resnet101(pretrained=None) num_features = model.fc.in_features model.fc = nn.Linear(num_features, 1222) param = torch.load(args.model) model.load_state_dict(param) model.fc = Identity() #model = DataParallel(model) model.to('cuda') cudnn.benchmark = True model.eval() # logistic regression reglog = RegLog(args.layer_num, 10000).to('cuda') filename = args.dataset filepath = '/faces_83/evaluation/' + filename + '/' #print(filepath) class_list = glob(filepath + '*') class_list = [os.path.basename(r) for r in class_list] class_num = len(class_list) len_images = [] images = [] labels = [] for n, class_name in enumerate(class_list): class_images = glob(filepath + class_name + '/*.jpg') images.extend(class_images) len_images.append(float(len(class_images))) label = [n] * len(class_images) labels.extend(label) features_stacked = extract_features(images, model, args.batch_size, args.workers, reglog, args.layer) map_model = umap.UMAP(n_components=2, metric='cosine') reduction_result = umap_model.fit_transform(features_stacked) c = 0 for i in range(len(len_images)): cls = int(len_images[i]) plt.plot(reduction_result[c:c + cls - 1, 0], reduction_result[c:c + cls - 1, 1], ".", color=colormap[i]) c += cls plt.show()
train_loader = VCRLoader.from_dataset(train, **loader_params) val_loader = VCRLoader.from_dataset(val, **loader_params) test_loader = VCRLoader.from_dataset(test, **loader_params) ARGS_RESET_EVERY = 100 print("Loading {} for {}".format(params['model'].get('type', 'WTF?'), 'rationales' if args.rationale else 'answer'), flush=True) model = Model.from_params(vocab=train.vocab, params=params['model']) for submodule in model.detector.backbone.modules(): if isinstance(submodule, BatchNorm2d): submodule.track_running_stats = False for p in submodule.parameters(): p.requires_grad = False model = DataParallel(model).cuda() if NUM_GPUS > 1 else model.cuda() optimizer = Optimizer.from_params( [x for x in model.named_parameters() if x[1].requires_grad], params['trainer']['optimizer']) lr_scheduler_params = params['trainer'].pop("learning_rate_scheduler", None) scheduler = LearningRateScheduler.from_params( optimizer, lr_scheduler_params) if lr_scheduler_params else None if os.path.exists(args.folder): print("Found folder! restoring", flush=True) start_epoch, val_metric_per_epoch = restore_checkpoint( model, optimizer, serialization_dir=args.folder, learning_rate_scheduler=scheduler)
def train_model(train_dataset, train_num_each, val_dataset, val_num_each): num_train = len(train_dataset) num_val = len(val_dataset) train_useful_start_idx = get_useful_start_idx(sequence_length, train_num_each) #print('train_useful_start_idx ',train_useful_start_idx ) val_useful_start_idx = get_useful_start_idx(sequence_length, val_num_each) #print('test_useful_start_idx ', val_useful_start_idx) num_train_we_use = len(train_useful_start_idx) // num_gpu * num_gpu # print('num_train_we_use',num_train_we_use) #92166 num_val_we_use = len(val_useful_start_idx) // num_gpu * num_gpu # print('num_val_we_use', num_val_we_use) # num_train_we_use = 8000 # num_val_we_use = 800 train_we_use_start_idx = train_useful_start_idx[ 0:num_train_we_use] # 训练数据开始位置 val_we_use_start_idx = val_useful_start_idx[0:num_val_we_use] np.random.seed(0) np.random.shuffle(train_we_use_start_idx) # 将序列的所有元素随机排序 train_idx = [] for i in range(num_train_we_use): # 训练集帧数 for j in range(sequence_length): train_idx.append(train_we_use_start_idx[i] + j * srate) # 训练数据位置,每一张图是一个数据 # print('train_idx',train_idx) val_idx = [] for i in range(num_val_we_use): for j in range(sequence_length): val_idx.append(val_we_use_start_idx[i] + j * srate) # print('val_idx',val_idx) num_train_all = float(len(train_idx)) num_val_all = float(len(val_idx)) print('num of train dataset: {:6d}'.format(num_train)) print('num train start idx : {:6d}'.format(len(train_useful_start_idx))) print('last idx train start: {:6d}'.format(train_useful_start_idx[-1])) print('num of train we use : {:6d}'.format(num_train_we_use)) print('num of all train use: {:6d}'.format(int(num_train_all))) print('num of valid dataset: {:6d}'.format(num_val)) print('num valid start idx : {:6d}'.format(len(val_useful_start_idx))) print('last idx valid start: {:6d}'.format(val_useful_start_idx[-1])) print('num of valid we use : {:6d}'.format(num_val_we_use)) print('num of all valid use: {:6d}'.format(int(num_val_all))) val_loader = DataLoader( val_dataset, batch_size=val_batch_size, # sampler=val_idx, sampler=SeqSampler(val_dataset, val_idx), num_workers=workers, pin_memory=False) model = res34_tcn() if use_gpu: model = model.cuda() model = DataParallel(model) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate) # model.parameters()与model.state_dict()是Pytorch中用于查看网络参数的方法。前者多见于优化器的初始化,后者多见于模型的保存 best_model_wts = copy.deepcopy(model.state_dict()) best_val_accuracy = 0.0 correspond_train_acc = 0.0 record_np = np.zeros([epochs, 5]) for epoch in range(epochs): np.random.seed(epoch) np.random.shuffle(train_we_use_start_idx) # 将序列的所有元素随机排序 train_idx = [] for i in range(num_train_we_use): for j in range(sequence_length): train_idx.append(train_we_use_start_idx[i] + j * srate) train_loader = DataLoader(train_dataset, batch_size=train_batch_size, sampler=SeqSampler(train_dataset, train_idx), num_workers=workers, pin_memory=False) model.train() train_loss = 0.0 train_corrects1 = 0 train_corrects = 0 train_start_time = time.time() num = 0 train_num = 0 for data in train_loader: num = num + 1 # inputs, labels_phase = data inputs, labels_phase, kdata = data if use_gpu: inputs = Variable( inputs.cuda()) # Variable就是一个存放会变化值的地理位置,里面的值会不停发生变化 labels = Variable(labels_phase.cuda()) kdatas = Variable(kdata.cuda()) else: inputs = Variable(inputs) labels = Variable(labels_phase) kdatas = Variable(kdata) optimizer.zero_grad() # 梯度初始化为零,也就是把loss关于weight的导数变成0. # outputs = model.forward(inputs) # 前向传播 outputs = model.forward(inputs, kdatas) #outputs = F.softmax(outputs, dim=-1) _, preds = torch.max( outputs.data, -1 ) # .data 获取Variable的内部Tensor;torch.max(a,1)返回每一行中最大值的那个元素,且返回其索引 #_, yp = torch.max(y.data, 1) #print(yp) # print(yp.shape) print(num) print(preds[-1]) print(labels) loss1 = criterion(outputs[0], labels) loss2 = criterion(outputs[1], labels) loss = 0.2 * loss1 + 0.8 * loss2 loss.backward() optimizer.step() train_loss += loss.data train_corrects1 += torch.sum(preds[-2] == labels.data) train_corrects += torch.sum(preds[-1] == labels.data) train_num += labels.shape[0] print(train_corrects.cpu().numpy() / train_num) if train_corrects.cpu().numpy() / train_num > 0.75: torch.save(copy.deepcopy(model.state_dict()), 'test.pth') # .state_dict()只保存网络中的参数(速度快,占内存少) train_elapsed_time = time.time() - train_start_time train_accuracy1 = train_corrects1.cpu().numpy() / train_num train_accuracy = train_corrects.cpu().numpy() / train_num train_average_loss = train_loss / train_num # begin eval model.eval() val_loss = 0.0 val_corrects1 = 0 val_corrects = 0 val_num = 0 val_start_time = time.time() for data in val_loader: inputs, labels_phase, kdata = data #inputs, labels_phase = data #labels_phase = labels_phase[(sequence_length - 1)::sequence_length] #kdata = kdata[(sequence_length - 1)::sequence_length] if use_gpu: inputs = Variable(inputs.cuda()) labels = Variable(labels_phase.cuda()) kdatas = Variable(kdata.cuda()) else: inputs = Variable(inputs) labels = Variable(labels_phase) kdatas = Variable(kdata) if crop_type == 0 or crop_type == 1: #outputs = model.forward(inputs) outputs = model.forward(inputs, kdatas) elif crop_type == 5: inputs = inputs.permute(1, 0, 2, 3, 4).contiguous() inputs = inputs.view(-1, 3, 224, 224) outputs = model.forward(inputs, kdatas) # outputs = model.forward(inputs) outputs = outputs.view(5, -1, 3) outputs = torch.mean(outputs, 0) elif crop_type == 10: inputs = inputs.permute(1, 0, 2, 3, 4).contiguous() inputs = inputs.view(-1, 3, 224, 224) outputs = model.forward(inputs, kdatas) #outputs = model.forward(inputs) outputs = outputs.view(10, -1, 3) outputs = torch.mean(outputs, 0) #outputs = outputs[sequence_length - 1::sequence_length] _, preds = torch.max(outputs.data, -1) #_, yp = torch.max(y.data, 1) print(num) print(preds[-1]) print(labels) loss1 = criterion(outputs[0], labels) loss2 = criterion(outputs[1], labels) loss = 0.2 * loss1 + 0.8 * loss2 #loss = 0.05 * loss1 + 0.15 * loss2 + 0.3 * loss3 + 0.5 * loss4 #loss = 0.05 * loss1 + 0.1 * loss2 + 0.25 * loss3 + 0.6 * loss4 val_loss += loss.data val_corrects1 += torch.sum(preds[-2] == labels.data) val_corrects += torch.sum(preds[-1] == labels.data) val_num += labels.shape[0] val_elapsed_time = time.time() - val_start_time ######################### #Metrics = compute_metrics(labels.data.cpu().numpy(), preds[-1].cpu().numpy()) #print(Metrics) ######################### val_accuracy1 = val_corrects1.cpu().numpy() / val_num val_accuracy = val_corrects.cpu().numpy() / val_num val_average_loss = val_loss / val_num print('epoch: {:4d}' ' train in: {:2.0f}m{:2.0f}s' ' train loss: {:4.4f}' ' train accu1: {:.4f}' ' train accu: {:.4f}' ' valid in: {:2.0f}m{:2.0f}s' ' valid loss: {:4.4f}' ' valid accu1: {:.4f}' ' valid accu: {:.4f}'.format( epoch, train_elapsed_time // 60, train_elapsed_time % 60, train_average_loss, train_accuracy1, train_accuracy, val_elapsed_time // 60, val_elapsed_time % 60, val_average_loss, val_accuracy1, val_accuracy)) if optimizer_choice == 0: if sgd_adjust_lr == 0: exp_lr_scheduler.step() elif sgd_adjust_lr == 1: exp_lr_scheduler.step(val_average_loss) if val_accuracy > best_val_accuracy: best_val_accuracy = val_accuracy correspond_train_acc = train_accuracy best_model_wts = copy.deepcopy(model.state_dict()) if val_accuracy == best_val_accuracy: if train_accuracy > correspond_train_acc: correspond_train_acc = train_accuracy best_model_wts = copy.deepcopy(model.state_dict()) record_np[epoch, 0] = train_accuracy record_np[epoch, 1] = train_average_loss record_np[epoch, 2] = val_accuracy1 record_np[epoch, 3] = val_accuracy record_np[epoch, 4] = val_average_loss np.save(str(epoch) + '.npy', record_np) print('best accuracy: {:.4f} cor train accu: {:.4f}'.format( best_val_accuracy, correspond_train_acc)) save_val = int("{:4.0f}".format(best_val_accuracy * 10000)) save_train = int("{:4.0f}".format(correspond_train_acc * 10000)) model_name = "lstm" \ + "_epoch_" + str(epochs) \ + "_length_" + str(sequence_length) \ + "_opt_" + str(optimizer_choice) \ + "_mulopt_" + str(multi_optim) \ + "_flip_" + str(use_flip) \ + "_crop_" + str(crop_type) \ + "_batch_" + str(train_batch_size) \ + "_train_" + str(save_train) \ + "_val_" + str(save_val) \ + ".pth" torch.save(best_model_wts, model_name) record_name = "lstm" \ + "_epoch_" + str(epochs) \ + "_length_" + str(sequence_length) \ + "_opt_" + str(optimizer_choice) \ + "_mulopt_" + str(multi_optim) \ + "_flip_" + str(use_flip) \ + "_crop_" + str(crop_type) \ + "_batch_" + str(train_batch_size) \ + "_train_" + str(save_train) \ + "_val_" + str(save_val) \ + ".npy" np.save(record_name, record_np)