def generate_base_model(whole_image_id, coco_data, ins_seg_model, seed_batch, batch_size): """ generate base models, separately use 20% data 30% data 40% data 40% data 50% data ~~~~100% data the data is randomly selected and the eavl results save as baseline """ # initialize quantity relationship whole_train_size = len(whole_image_id) if seed_batch < 1: seed_batch = int(seed_batch * whole_train_size) if batch_size < 1: batch_size = int(batch_size * whole_train_size) # initialize random sampler sampler = CoCoRandomSampler(sampler_name='random', whole_image_id=whole_image_id) # initally, seed_batch pieces of image were selected randomly selected_image_id = random.sample(whole_image_id, seed_batch) # register data set and build data loader register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_id) data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') n_batches = int(np.ceil( ((whole_train_size - seed_batch) * 1 / batch_size))) + 1 for n in range(n_batches): # check the size in this iter n_train_size = seed_batch + min( (whole_train_size - seed_batch), n * batch_size) print('{} data ponints for training in iter{}'.format(n_train_size, n)) assert n_train_size == len(selected_image_id) ins_seg_model.save_selected_image_id(selected_image_id) ins_seg_model.fit_on_subset(data_loader_from_selected_image_files) n_sample = min(batch_size, whole_train_size - len(selected_image_id)) new_batch = sampler.select_batch(n_sample, already_selected=selected_image_id) selected_image_id.extend(new_batch) print('Requested: %d, Selected: %d' % (n_sample, len(new_batch))) # register dataset and build data loader register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_id) data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') assert len(new_batch) == n_sample # reset model if ins_seg_model.reset_model()
def train_seed(args, project_id, coco_data, resume_or_load, seed_batch): """ check if there is origin (100)image_id list in the OUTPUT_DIR/selected_image_list/project_id dir if not save the origin (100)image_id list the file 100 is whole data set image id list the file 0 is this iter we randomly select image id list """ dir = OUTPUT_DIR + '/' + 'selected_img_list' + '/' + project_id if not os.path.exists(dir): os.makedirs(dir) file = dir + '/' + str(100) if not os.path.exists(file): ins_seg_model = CoCoSegModel( args=args, project_id=project_id, coco_data=coco_data, resume_or_load=resume_or_load, ) data_loader = ins_seg_model.trainer.data_loader image_files_list = [] index_list = data_loader.dataset._dataset._lst for item in index_list: image_files_list.append(item['image_id']) save_img_list(project_id=project_id, iteration=100, img_id_list=image_files_list) print("run the function train_seed again") else: image_files_list = read_img_list(project_id=project_id, iteration=100) whole_train_size = len(image_files_list) if seed_batch < 1: seed_batch = int(seed_batch * whole_train_size) selected_image_files = random.sample(image_files_list, seed_batch) print("selected {} images from the {} images ".format( seed_batch, whole_train_size)) save_img_list(project_id=project_id, iteration=0, img_id_list=selected_image_files) print("save the image ids randomly selected this iter 0") ins_seg_model = CoCoSegModel( args=args, project_id=project_id, coco_data=coco_data, train_size=len(selected_image_files), resume_or_load=resume_or_load, ) register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_files) data_loader_from_selected_image_files, _ = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, iter_num=0)
def generate_one_curve(whole_image_id, coco_data, sampler, ins_seg_model, seed_batch, batch_size): # initialize the quantity relationship whole_train_size = len(whole_image_id) if seed_batch < 1: seed_batch = int(seed_batch * whole_train_size) if batch_size < 1: batch_size = int(batch_size * whole_train_size) # initally, seed_batch pieces of image were selected randomly selected_image_id = random.sample(whole_image_id, seed_batch) # register data set and build data loader register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_id) data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') n_batches = int(np.ceil( ((whole_train_size - seed_batch) * 1 / batch_size))) + 1 for n in range(n_batches): # check the size in this iter n_train_size = seed_batch + min( (whole_train_size - seed_batch), n * batch_size) print('{} data ponints for training in iter{}'.format(n_train_size, n)) assert n_train_size == len(selected_image_id) ins_seg_model.save_selected_image_id(selected_image_id) ins_seg_model.fit_on_subset(data_loader_from_selected_image_files) # get the losses for loss_sampler losses = ins_seg_model.compute_loss( json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], ) n_sample = min(batch_size, whole_train_size - len(selected_image_id)) new_batch = sampler.select_batch(n_sample, already_selected=selected_image_id, losses=losses, loss_decrease=False) selected_image_id.extend(new_batch) print('Requested: %d, Selected: %d' % (n_sample, len(new_batch))) # register dataset and build data loader register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_id) data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') assert len(new_batch) == n_sample # reset model if ins_seg_model.reset_model()
def train_on_batch(args, project_id, coco_data, resume_or_load, seed_batch, batch_size): # get the whole indexes of coco image_files_list = read_img_list(project_id=project_id, iteration=100) whole_train_size = len(image_files_list) if seed_batch < 1: seed_batch = int(seed_batch * whole_train_size) if batch_size < 1: batch_size = int(batch_size * whole_train_size) # get the iter_num now by accessing saved indexes eg(if file 0 exist then iter_num now is 1) iter_num = get_iter(project_id=project_id) - 1 n_batches = int(np.ceil(((whole_train_size - seed_batch) * 1 / batch_size))) + 1 for n in range(n_batches): if n != iter_num: continue else: "" "init seg_model """ selected_image_files = read_img_list(project_id=project_id, iteration=iter_num) train_size_this_iter = len(selected_image_files) ins_seg_model = CoCoSegModel( args=args, project_id=project_id, coco_data=coco_data, train_size=train_size_this_iter, resume_or_load=resume_or_load ) register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_files ) data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, iter_num=iter_num) losses = ins_seg_model.compute_loss(json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root']) whole_image_id_list = read_img_list(project_id=project_id, iteration=100) """ init sampler """ sampler = LossSampler(sampler_name='increase_loss') n_sample = min(batch_size, whole_train_size - len(selected_image_files)) start_time = int(time.time()) new_batch = sampler.select_batch(n_sample, already_selected=selected_image_files, losses=losses, loss_decrease=False) end_time = int(time.time()) print("select batch using " + str(end_time - start_time) + "s") selected_image_files.extend(new_batch) save_img_list(project_id=project_id, iteration=n + 1, img_id_list=selected_image_files) print("save {} images id list for iter {}".format(len(selected_image_files), n + 1)) print('in {} iter'.format(n))
def fit_on_single_data(self, image_id_list): """ for each image in image_id_list build a data_loader iteratively return a list of dict,dict {'image_id':int, 'score':float} use every data in image_id_list to fine tuning the base model and compute the promotion as the image's score """ score_list = [] base_model = copy.deepcopy(self.model) result = self.test() base_score = result['segm']['AP'] for image_id in image_id_list: dic = {'image_id': image_id} image_id = [image_id] register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=image_id) data_loader, l = self.trainer.re_build_train_loader( 'coco_from_selected_image', images_per_batch=1) self.trainer.data_loader = data_loader self.trainer._data_loader_iter = iter(data_loader) self.trainer.max_iter = 20 result = self.trainer.train() dic['score'] = result['segm']['AP'] - base_score score_list.append(dic) # back_to_base model self.back_to_base_model(base_model=base_model) # save score_list self.save_score_list(score_list) return score_list
def train_seed(args, project_id, coco_data, resume_or_load, seed_batch, batch_size): """ check if there is origin (100)image_id list in the OUTPUT_DIR/selected_image_list/project_id dir if not save the origin (100)image_id list the file 100 is whole data set image id list the file 0 is this iter we randomly select image id list """ dir = OUTPUT_DIR + '/' + 'selected_img_list' + '/' + project_id if not os.path.exists(dir): os.makedirs(dir) file = dir + '/' + str(100) if not os.path.exists(file): ins_seg_model = CoCoSegModel( args=args, project_id=project_id, coco_data=coco_data, resume_or_load=resume_or_load, ) data_loader = ins_seg_model.trainer.data_loader image_files_list = [] index_list = data_loader.dataset._dataset._lst for item in index_list: image_files_list.append(item['image_id']) save_img_list(project_id=project_id, iteration=100, img_id_list=image_files_list) print("run the function train_seed again") else: image_files_list = read_img_list(project_id=project_id, iteration=100) whole_train_size = len(image_files_list) if seed_batch < 1: seed_batch = int(seed_batch * whole_train_size) if batch_size < 1: batch_size = int(batch_size * whole_train_size) selected_image_files = random.sample(image_files_list, seed_batch) print("selected {} images from the {} images ".format(seed_batch, whole_train_size)) save_img_list(project_id=project_id, iteration=0, img_id_list=selected_image_files) print("save the image ids randomly selected this iter 0") ins_seg_model = CoCoSegModel( args=args, project_id=project_id, coco_data=coco_data, train_size=len(selected_image_files), resume_or_load=resume_or_load, ) register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_files ) data_loader_from_selected_image_files, _ = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, iter_num=0) """ use the trained model to get losses """ losses = ins_seg_model.compute_loss(json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root']) whole_image_id_list = read_img_list(project_id=project_id, iteration=100) """ init sampler """ sampler = LossSampler(sampler_name='increase_loss') n_sample = min(batch_size, whole_train_size - len(selected_image_files)) start_time = int(time.time()) new_batch = sampler.select_batch(n_sample,already_selected=selected_image_files,losses=losses,loss_decrease=False) end_time = int(time.time()) print("select batch using " + str(end_time - start_time) + "s") selected_image_files.extend(new_batch) save_img_list(project_id=project_id, iteration=1, img_id_list=selected_image_files) print("save {} images id list for iter 1".format(len(selected_image_files)))
def generate_one_curve( whole_image_id, coco_data, sampler, ins_seg_model, seed_batch, batch_size, image2class, ): """ :return: """ # initialize the quantity relationship whole_train_size = len(whole_image_id) if seed_batch < 1: seed_batch = int(seed_batch * whole_train_size) if batch_size < 1: batch_size = int(batch_size * whole_train_size) # initialize the container results = {} data_sizes = [] mious = [] # initally, seed_batch pieces of image were selected randomly selected_image_id = random.sample(whole_image_id, seed_batch) # register data set and build data loader register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_id) data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') n_batches = int(np.ceil( ((whole_train_size - seed_batch) * 1 / batch_size))) + 1 for n in range(n_batches): # check the size in this iter n_train_size = seed_batch + min( (whole_train_size - seed_batch), n * batch_size) print('{} data ponints for training in iter{}'.format(n_train_size, n)) assert n_train_size == len(selected_image_id) data_sizes.append(n_train_size) ins_seg_model.save_selected_image_id(selected_image_id) ins_seg_model.fit_on_subset(data_loader_from_selected_image_files) miou = ins_seg_model.test() mious.append(miou) print('miou:{} in {} iter'.format(miou['miou'], n)) """ get the mask feature use the trained model and use the mask feature to cluster: KNN """ # get the losses for loss_sampler losses = ins_seg_model.compute_loss( json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], ) n_sample = min(batch_size, whole_train_size - len(selected_image_id)) new_batch = sampler.slect_batch_from_groups( n_sample=n_sample, already_selected=selected_image_id, losses=losses, loss_decrease=False, image2class=image2class, ) selected_image_id.extend(new_batch) print('Requested: %d, Selected: %d' % (n_sample, len(new_batch))) # register dataset and build data loader register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_id) data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') assert len(new_batch) == n_sample # reset model if ins_seg_model.reset_model() results['mious'] = mious results['data_sizes'] = data_sizes print(results)
def generate_one_curve(coco_data, data_loader, sampler, ins_seg_model, seed_batch, batch_size): """ :param data_loader: the data_loader contains all training data , we use the sampler select data(image) from it. :param sampler: active learning sampler :param ins_seg_model: model used to score the samplers. Expects fit and test methods to be implemented. :param seed_batch: float(from 0 to 1) float indicates percentage of train data to use for initial model :param batch_size: float (from 0 to 1) float indicates batch size as a percent of training data , we use sampler select batch_size peaces of data (image) :return: """ # def select_batch(sampler, n_sample, already_selcted, **kwargs): # """ # # :param sampler: active learning sampler # :param n_sample: we select n_sample pieces of data(image) # :param already_selcted: Data (image)that has been selected before # :param kwargs: # :return: # """ # kwargs['n_sample'] = n_sample # kwargs['already_selected'] = already_selcted # batch = sampler.select_batch(**kwargs) # return batch # get all the image files from the data_loader image_files_list = [] list = data_loader.dataset._dataset._lst for item in list: image_files_list.append(item['image_id']) # The size of the entire training set train_size = len(image_files_list) # transform seed_batch and batch_size from float which indicate percentage of entire training set to int seed_batch = int(seed_batch * train_size) batch_size = int(batch_size * train_size) # We recorded the results of the model training and testing after each data sampling results = {} data_sizes = [] mious = [] # initally, seed_batch pieces of image were selected randomly selected_image_files = random.sample(image_files_list, seed_batch) register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_files) data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') # data_loader_iter = iter(data_loader_from_selected_image_files) # data = next(data_loader_iter) # n_batches cycles were used to sample all the data of the training set n_batches = int(np.ceil(((train_size - seed_batch) * 1 / batch_size))) + 1 for n in range(n_batches): n_train = seed_batch + min((train_size - seed_batch), n * batch_size) print('{} data ponints for training in iter{}'.format(n_train, n)) assert n_train == len(selected_image_files) data_sizes.append(n_train) ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, n) miou = ins_seg_model.test() mious.append(miou) print('miou:{} in {} iter'.format(miou['miou'], n)) # get the losses for loss_sampler losses = ins_seg_model.compute_loss( json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], ) n_sample = min(batch_size, train_size - len(selected_image_files)) new_batch = sampler.select_batch(n_sample, already_selected=selected_image_files, losses=losses) selected_image_files.extend(new_batch) print('Requested: %d, Selected: %d' % (n_sample, len(new_batch))) register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_files) data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') assert len(new_batch) == n_sample results['mious'] = mious results['data_sizes'] = data_sizes results['sampler'] = sampler.sample_name print(results)
def train_on_batch(args, project_id, coco_data, resume_or_load, seed_batch, batch_size): # get the whole indexes of coco image_files_list = read_img_list(project_id=project_id, iteration=100) whole_train_size = len(image_files_list) if seed_batch < 1: seed_batch = int(seed_batch * whole_train_size) if batch_size < 1: batch_size = int(batch_size * whole_train_size) # get the iter_num now by accessing saved indexes eg(if file 0 exist then iter_num now is 1)_ iter_num = get_iter(project_id=project_id) n_batches = int(np.ceil( ((whole_train_size - seed_batch) * 1 / batch_size))) + 1 for n in range(n_batches): if n != iter_num: continue else: "" "init seg_model " "" selected_image_files = read_img_list(project_id=project_id, iteration=iter_num - 1) train_size_this_iter = seed_batch + min( (whole_train_size - len(selected_image_files)), n * batch_size) ins_seg_model = CoCoSegModel(args=args, project_id=project_id, coco_data=coco_data, train_size=train_size_this_iter, resume_or_load=resume_or_load) data_loader = ins_seg_model.trainer.data_loader mask_feature = ins_seg_model.save_mask_features( json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root']) """ init sampler""" # sampler = CoCoRandomSampler('random_sampler', data_loader) sampler = CoreSetSampler('coreset_sampler', mask_feature) n_sample = min(batch_size, whole_train_size - len(selected_image_files)) start_time = int(time.time()) new_batch = sampler.select_batch( n_sample, already_selected=selected_image_files) end_time = int(time.time()) print("select batch using " + str(end_time - start_time) + "s") print("selected {} new images in {} iter,{} images used to train". format(n_sample, n, train_size_this_iter)) selected_image_files.extend(new_batch) save_img_list(project_id=project_id, iteration=n, img_id_list=selected_image_files) print("save {} images id list ".format(len(selected_image_files))) register_coco_instances_from_selected_image_files( name='coco_from_selected_image', json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'], selected_image_files=selected_image_files) data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader( 'coco_from_selected_image') assert train_size_this_iter == len(selected_image_files) ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, iter_num=iter_num) print('in {} iter'.format(n))