def train_on_batch(args, project_id, coco_data, resume_or_load, seed_batch, batch_size): # get the whole indexes of coco image_files_list = read_img_list(project_id=project_id, iteration=100) whole_train_size = len(image_files_list) if seed_batch < 1: seed_batch = int(seed_batch * whole_train_size) if batch_size < 1: batch_size = int(batch_size * whole_train_size) # get the iter_num now by accessing saved indexes eg(if file 0 exist then iter_num now is 1) iter_num = get_iter(project_id=project_id) - 1 n_batches = int(np.ceil(((whole_train_size - seed_batch) * 1 / batch_size))) + 1 for n in range(n_batches): if n != iter_num: continue else: "" "init seg_model """ selected_image_files = read_img_list(project_id=project_id, iteration=iter_num) train_size_this_iter = len(selected_image_files) ins_seg_model = CoCoSegModel( args=args, project_id=project_id, coco_data=coco_data, train_size=train_size_this_iter, resume_or_load=resume_or_load ) register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_files ) data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, iter_num=iter_num) losses = ins_seg_model.compute_loss(json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root']) whole_image_id_list = read_img_list(project_id=project_id, iteration=100) """ init sampler """ sampler = LossSampler(sampler_name='increase_loss') n_sample = min(batch_size, whole_train_size - len(selected_image_files)) start_time = int(time.time()) new_batch = sampler.select_batch(n_sample, already_selected=selected_image_files, losses=losses, loss_decrease=False) end_time = int(time.time()) print("select batch using " + str(end_time - start_time) + "s") selected_image_files.extend(new_batch) save_img_list(project_id=project_id, iteration=n + 1, img_id_list=selected_image_files) print("save {} images id list for iter {}".format(len(selected_image_files), n + 1)) print('in {} iter'.format(n))
def reset_seg_model(seg_model, coco_data): args = seg_model.args project_id = seg_model.project_id resume_or_load = seg_model.resume_or_load del seg_model new_seg_model = CoCoSegModel(args, project_id, coco_data, resume_or_load) return new_seg_model
def train_seed(args, project_id, coco_data, resume_or_load, seed_batch): """ check if there is origin (100)image_id list in the OUTPUT_DIR/selected_image_list/project_id dir if not save the origin (100)image_id list the file 100 is whole data set image id list the file 0 is this iter we randomly select image id list """ dir = OUTPUT_DIR + '/' + 'selected_img_list' + '/' + project_id if not os.path.exists(dir): os.makedirs(dir) file = dir + '/' + str(100) if not os.path.exists(file): ins_seg_model = CoCoSegModel( args=args, project_id=project_id, coco_data=coco_data, resume_or_load=resume_or_load, ) data_loader = ins_seg_model.trainer.data_loader image_files_list = [] index_list = data_loader.dataset._dataset._lst for item in index_list: image_files_list.append(item['image_id']) save_img_list(project_id=project_id, iteration=100, img_id_list=image_files_list) print("run the function train_seed again") else: image_files_list = read_img_list(project_id=project_id, iteration=100) whole_train_size = len(image_files_list) if seed_batch < 1: seed_batch = int(seed_batch * whole_train_size) selected_image_files = random.sample(image_files_list, seed_batch) print("selected {} images from the {} images ".format( seed_batch, whole_train_size)) save_img_list(project_id=project_id, iteration=0, img_id_list=selected_image_files) print("save the image ids randomly selected this iter 0") ins_seg_model = CoCoSegModel( args=args, project_id=project_id, coco_data=coco_data, train_size=len(selected_image_files), resume_or_load=resume_or_load, ) register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_files) data_loader_from_selected_image_files, _ = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, iter_num=0)
assert len(new_batch) == n_sample # reset model if ins_seg_model.reset_model() if __name__ == '__main__': """ train some base model use separately 20% data 30% data ......100% data """ coco_data = debug_data args = default_argument_parser().parse_args() seg_model = CoCoSegModel(args, project_id='Base', coco_data=coco_data, resume_or_load=True) data_loader = seg_model.trainer.data_loader whole_image_id = [ item['image_id'] for item in data_loader.dataset._dataset._lst ] generate_base_model(whole_image_id=whole_image_id, coco_data=coco_data, ins_seg_model=seg_model, seed_batch=0.2, batch_size=0.1) """ load the trained base models, and use base models to fit_on_single_data get score_list the score_list will be saved as OUTPUT_DIR/file/score_list """
image_root=coco_data[0]['image_root'], selected_image_files=selected_image_id) data_loader_from_selected_image_files, _ = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') # reset model print("--reset model") ins_seg_model.reset_model() if __name__ == "__main__": args = default_argument_parser().parse_args() project_id = "random" seg_model = CoCoSegModel(args, project_id=project_id, coco_data=debug_data, resume_or_load=True) data_loader = seg_model.trainer.data_loader whole_image_id = [] index_list = data_loader.dataset._dataset._lst for item in index_list: whole_image_id.append(item['image_id']) randomsampler = CoCoRandomSampler("random_sampler", whole_image_id=whole_image_id) generate_one_curve( coco_data=copy.deepcopy(debug_data), whole_image_id=copy.deepcopy(whole_image_id), sampler=randomsampler, ins_seg_model=seg_model, batch_size=100,
def train_seed(args, project_id, coco_data, resume_or_load, seed_batch, batch_size): """ check if there is origin (100)image_id list in the OUTPUT_DIR/selected_image_list/project_id dir if not save the origin (100)image_id list the file 100 is whole data set image id list the file 0 is this iter we randomly select image id list """ dir = OUTPUT_DIR + '/' + 'selected_img_list' + '/' + project_id if not os.path.exists(dir): os.makedirs(dir) file = dir + '/' + str(100) if not os.path.exists(file): ins_seg_model = CoCoSegModel( args=args, project_id=project_id, coco_data=coco_data, resume_or_load=resume_or_load, ) data_loader = ins_seg_model.trainer.data_loader image_files_list = [] index_list = data_loader.dataset._dataset._lst for item in index_list: image_files_list.append(item['image_id']) save_img_list(project_id=project_id, iteration=100, img_id_list=image_files_list) print("run the function train_seed again") else: image_files_list = read_img_list(project_id=project_id, iteration=100) whole_train_size = len(image_files_list) if seed_batch < 1: seed_batch = int(seed_batch * whole_train_size) if batch_size < 1: batch_size = int(batch_size * whole_train_size) selected_image_files = random.sample(image_files_list, seed_batch) print("selected {} images from the {} images ".format(seed_batch, whole_train_size)) save_img_list(project_id=project_id, iteration=0, img_id_list=selected_image_files) print("save the image ids randomly selected this iter 0") ins_seg_model = CoCoSegModel( args=args, project_id=project_id, coco_data=coco_data, train_size=len(selected_image_files), resume_or_load=resume_or_load, ) register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_files ) data_loader_from_selected_image_files, _ = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, iter_num=0) """ use the trained model to get losses """ losses = ins_seg_model.compute_loss(json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root']) whole_image_id_list = read_img_list(project_id=project_id, iteration=100) """ init sampler """ sampler = LossSampler(sampler_name='increase_loss') n_sample = min(batch_size, whole_train_size - len(selected_image_files)) start_time = int(time.time()) new_batch = sampler.select_batch(n_sample,already_selected=selected_image_files,losses=losses,loss_decrease=False) end_time = int(time.time()) print("select batch using " + str(end_time - start_time) + "s") selected_image_files.extend(new_batch) save_img_list(project_id=project_id, iteration=1, img_id_list=selected_image_files) print("save {} images id list for iter 1".format(len(selected_image_files)))
assert len(new_batch) == n_sample # reset model if ins_seg_model.reset_model() results['mious'] = mious results['data_sizes'] = data_sizes print(results) if __name__ == "__main__": coco_data = debug_data args = default_argument_parser().parse_args() seg_model = CoCoSegModel(args, project_id='self_paced_with_diversity', coco_data=coco_data, resume_or_load=True) data_loader = seg_model.trainer.data_loader whole_image_id = [ item['image_id'] for item in data_loader.dataset._dataset._lst ] # waiting for VAE feature to be generated while True: if not os.path.exists(VAE_feature_path): print('waiting for VAE feature') time.sleep(15) else: break print('the VAE feature has been generated')
assert len(new_batch_1) == n_sample assert len(new_batch_2) == n_sample # reset model if necessary ins_seg_model_1.reset_model() ins_seg_model_2.reset_model() if __name__ == "__main__": data = debug_data args = default_argument_parser().parse_args() seg_model_1 = CoCoSegModel(args, project_id='co_teaching_model_1', coco_data=data, model_config='Mask_RCNN2', resume_or_load=True) seg_model_2 = CoCoSegModel(args, project_id='co_teaching_model_2', coco_data=data, model_config='Mask_RCNN', resume_or_load=True) data_loader_1 = seg_model_1.trainer.data_loader data_loader_2 = seg_model_2.trainer.data_loader whole_image_id_1 = [] index_list = data_loader_1.dataset._dataset._lst
torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.enabled = False if __name__ == '__main__': seed_torch() # 用于检查所有图像能否可以被正确读取 # 结果:COCO_train2014_000000167126.jpg load error! # check_file("/home/muyun99/Downloads/Tsne/adsampler/imageid/all") # initialize seg_model and get the whole_image_id args = default_argument_parser().parse_args() seg_model = CoCoSegModel(args, project_id='adversely', coco_data=coco_data, resume_or_load=True) data_loader = seg_model.trainer.data_loader whole_image_id = [item['image_id'] for item in data_loader.dataset._dataset._lst] # 错误图像的id放在error_imgid error_imgid = [167126] trainer = Adversary_sampler_trainer(whole_image_id=whole_image_id) # 加载预训练模型 trainer.load_weight( vae_weight=os.path.join(WEIGHT_path, "vae_model_14912_2500.pth"), dis_weight=os.path.join(WEIGHT_path, "dis_model_14912_2500.pth") ) # 构造训练用的dataloader
def train_on_batch(args, project_id, coco_data, resume_or_load, seed_batch, batch_size): # get the whole indexes of coco image_files_list = read_img_list(project_id=project_id, iteration=100) whole_train_size = len(image_files_list) if seed_batch < 1: seed_batch = int(seed_batch * whole_train_size) if batch_size < 1: batch_size = int(batch_size * whole_train_size) # get the iter_num now by accessing saved indexes eg(if file 0 exist then iter_num now is 1)_ iter_num = get_iter(project_id=project_id) n_batches = int(np.ceil( ((whole_train_size - seed_batch) * 1 / batch_size))) + 1 for n in range(n_batches): if n != iter_num: continue else: "" "init seg_model " "" selected_image_files = read_img_list(project_id=project_id, iteration=iter_num - 1) train_size_this_iter = seed_batch + min( (whole_train_size - len(selected_image_files)), n * batch_size) ins_seg_model = CoCoSegModel(args=args, project_id=project_id, coco_data=coco_data, train_size=train_size_this_iter, resume_or_load=resume_or_load) data_loader = ins_seg_model.trainer.data_loader mask_feature = ins_seg_model.save_mask_features( json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root']) """ init sampler""" # sampler = CoCoRandomSampler('random_sampler', data_loader) sampler = CoreSetSampler('coreset_sampler', mask_feature) n_sample = min(batch_size, whole_train_size - len(selected_image_files)) start_time = int(time.time()) new_batch = sampler.select_batch( n_sample, already_selected=selected_image_files) end_time = int(time.time()) print("select batch using " + str(end_time - start_time) + "s") print("selected {} new images in {} iter,{} images used to train". format(n_sample, n, train_size_this_iter)) selected_image_files.extend(new_batch) save_img_list(project_id=project_id, iteration=n, img_id_list=selected_image_files) print("save {} images id list ".format(len(selected_image_files))) register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_files) data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') assert train_size_this_iter == len(selected_image_files) ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, iter_num=iter_num) print('in {} iter'.format(n))