def get_statis(food_dataset_root, ct, img_set, cls_img_set): """get_statis :param food_dataset_root: :param ct: cantten name :param img_set: statistics the xml files whose name occur in img_set set :param cls_img_set: only statistic the categories occured in cls_img_set set """ all_trainval_set = food_dataset_root + "Food_{}/ImageSets/{}.txt".format( ct, img_set) all_xml_dir = food_dataset_root + "Food_{}/Annotations".format(ct) all_stats = get_xml_from_file(all_trainval_set, all_xml_dir) all_stats = dict(all_stats) print("-------processing {} {}-----------".format(ct, img_set)) imgset_category = get_categories(ct + '_' + img_set) with open( "./statistics/{}_{}_{}_static.txt".format(ct, img_set, cls_img_set), 'w') as f: for cls in get_categories(cls_img_set)[1:]: if imgset_category is not None and cls not in imgset_category: continue if int(cls) in all_stats: k = int(cls) v = all_stats[k] f.write( str(k) + '\t' + str(v) + '\t' + id2chn[str(k)] + '\t' + id2eng[str(k)] + '\n') else: f.write("\n")
def create_mtN_imageset(canteen, imgset, N: int): """create_mtN_imageset :param canteen: :param imgset: only support train or val :param N: :type N: int """ assert imgset != 'train' or imgset != 'val' or imgset != 'test' print("---processing {} mt {} {} ------".format(canteen, imgset, N)) imgsets_path = "../data/Food/Food_{}/ImageSets".format(canteen) anno_path = "../data/Food/Food_{}/Annotations".format(canteen) with open(os.path.join(imgsets_path, "{}.txt").format(imgset), 'r') as f: xml_files = [x.strip("\n")+'.xml' for x in f.readlines()] content = [] for xf in xml_files: objects = parse_rec(os.path.join(anno_path, xf)) for obj in objects: # only reserve the !!! training sample whose count is larger than 10 if N != 0: match_categories = get_categories( canteen+"_train_mt{}".format(N)) else: match_categories = get_categories(canteen+"_train") if obj['name'] in match_categories: content.append(xf.split(".")[0] + '\n') break print("saving {} sets:{}_mt{}".format(imgset, len(content), N)) with open(os.path.join(imgsets_path, "{}mt{}.txt".format(imgset, N)), 'w') as f: f.writelines(content)
def create_inner_imageset(ct, excl_train_mtN): ''' inner is the inner set between train of excl{dataset} and trainval of {dataset} ''' print("------processing {}-----------".format(ct)) imgsets_path = "../data/Food/Food_{}/ImageSets".format(ct) anno_path = "../data/Food/Food_{}/Annotations".format(ct) if excl_train_mtN == 0: excl_class = get_categories("excl"+ct+"_train") else: excl_class = get_categories( "excl"+ct+"_trainmt{}".format(excl_train_mtN)) # 3种方法实现通过回调函数,对xml进行筛选 # 1. save extra info of callback with class fx = filter_xml(excl_class) process_all_xml_files_from_dir(anno_path, fx.process) print(len(fx.reserver_xmls)) filter_xmls = fx.reserver_xmls # 保存筛选信息 print("saving inner mt {} sets:{}".format( excl_train_mtN, len(filter_xmls))) print(imgsets_path) if excl_train_mtN == 0: saving_file = "inner.txt" else: saving_file = "innermt{}.txt".format(excl_train_mtN) with open(os.path.join(imgsets_path, saving_file), 'w') as f: for i in filter_xmls: x_name = os.path.split(i)[1] x_name = os.path.splitext(x_name)[0] f.write(x_name + '\n')
def create_inner_imagesets(): ''' inner is the inner set between train of excl{dataset} and trainval of {dataset} ''' cantten = ['Arts', 'Science', 'TechMixedVeg', 'TechChicken', 'UTown', 'YIH'] for ct in cantten: print("------processing {}-----------".format(ct)) imgsets_path = "../data/Food/Food_{}/ImageSets".format(ct) anno_path = "../data/Food/Food_{}/Annotations".format(ct) for N in [0, 10, 30, 50, 100]: if N == 0: excl_class = get_categories("excl"+ct+"_train") else: excl_class = get_categories("excl"+ct+"_trainmt{}".format(N)) # 3种方法实现通过回调函数,对xml进行筛选 # 1. save extra info of callback with class # fx = filter_xml(tech_classes) # process_all_xml_files_from_dir(path, fx.process) # print(len(fx.reserver_xmls)) # 2. save extra info of callback with closet fx_clo = filter_clo(excl_class) process_all_xml_files_from_dir(anno_path, fx_clo) # print(len(fx_clo.__closure__)) # __closure__ 有cell对象的元祖构成 filter_xmls = fx_clo.__closure__[ 0].cell_contents # cell 对象有cell_contents的内容 # 3. 通过协程 # how to implement?? # NotImplemented # 保存筛选信息 print("saving inner mt {} sets:{}".format(N, len(filter_xmls))) print(imgsets_path) if N == 0: saving_file = "inner.txt" else: saving_file = "innermt{}.txt".format(N) with open(os.path.join(imgsets_path, saving_file), 'w') as f: for i in filter_xmls: x_name = os.path.split(i)[1] x_name = os.path.splitext(x_name)[0] f.write(x_name + '\n')
def create_few_inner_for_cross_domain(ct, imgset, mtN, fewN): """select_few_inner_for_train Selecting few shot training samples and from the val of canteen :param ct: :param mtN: N of mt which means the number of training sample is more than N :param fewN: the number of selected sample for each categories """ print("------processing {}-selecting few inner--------".format(ct)) imgsets_path = "../data/Food/Food_{}/ImageSets".format(ct) anno_path = "../data/Food/Food_{}/Annotations".format(ct) imset_path = os.path.join(imgsets_path, imgset+'.txt') if mtN == 0: excl_classes = get_categories("excl"+ct+"_train") else: excl_classes = get_categories("excl"+ct+"_trainmt{}".format(mtN)) cls_sample_count = {} for ex_cls in excl_classes[1:]: cls_sample_count[ex_cls] = 0 few_filter = Xml_in_few_sample_filter(cls_sample_count, fewN) dishes = create_dishes(ct, 'innermt10val') process_xml_from_file(imset_path, anno_path, few_filter.process) # 保存筛选信息 def saving_file(xmls, imgset): print("saving inner few{} mt {} {} sets:{}".format( fewN, mtN, imgset, len(xmls))) if mtN == 0: saving_file = "innermt10valfew{}{}.txt".format(fewN, imgset) else: saving_file = "innermt10valfew{}mt{}{}.txt".format( fewN, mtN, imgset) with open(os.path.join(imgsets_path, saving_file), 'w') as f: for x_name in xmls: f.write(x_name + '\n') few_filter.clean_discard_by_dishes(dishes) saving_file(few_filter.reserver_xmls, 'train') saving_file(few_filter.discard_xmls, 'val')