Exemplo n.º 1
0
def get_statis(food_dataset_root, ct, img_set, cls_img_set):
    """get_statis

    :param food_dataset_root:
    :param ct: cantten name
    :param img_set: statistics the xml files whose name occur in img_set set
    :param cls_img_set: only statistic the categories occured in cls_img_set
    set
    """

    all_trainval_set = food_dataset_root + "Food_{}/ImageSets/{}.txt".format(
        ct, img_set)
    all_xml_dir = food_dataset_root + "Food_{}/Annotations".format(ct)
    all_stats = get_xml_from_file(all_trainval_set, all_xml_dir)
    all_stats = dict(all_stats)
    print("-------processing {} {}-----------".format(ct, img_set))
    imgset_category = get_categories(ct + '_' + img_set)
    with open(
            "./statistics/{}_{}_{}_static.txt".format(ct, img_set,
                                                      cls_img_set), 'w') as f:
        for cls in get_categories(cls_img_set)[1:]:
            if imgset_category is not None and cls not in imgset_category:
                continue
            if int(cls) in all_stats:
                k = int(cls)
                v = all_stats[k]
                f.write(
                    str(k) + '\t' + str(v) + '\t' + id2chn[str(k)] + '\t' +
                    id2eng[str(k)] + '\n')
            else:
                f.write("\n")
def create_mtN_imageset(canteen, imgset, N: int):
    """create_mtN_imageset

    :param canteen:
    :param imgset: only support train or val
    :param N:
    :type N: int
    """

    assert imgset != 'train' or imgset != 'val' or imgset != 'test'
    print("---processing {} mt {} {} ------".format(canteen, imgset, N))

    imgsets_path = "../data/Food/Food_{}/ImageSets".format(canteen)
    anno_path = "../data/Food/Food_{}/Annotations".format(canteen)
    with open(os.path.join(imgsets_path, "{}.txt").format(imgset), 'r') as f:
        xml_files = [x.strip("\n")+'.xml' for x in f.readlines()]
        content = []
        for xf in xml_files:
            objects = parse_rec(os.path.join(anno_path, xf))
            for obj in objects:
                # only reserve the !!! training sample whose count is larger than 10
                if N != 0:
                    match_categories = get_categories(
                        canteen+"_train_mt{}".format(N))
                else:
                    match_categories = get_categories(canteen+"_train")

                if obj['name'] in match_categories:
                    content.append(xf.split(".")[0] + '\n')
                    break

        print("saving {} sets:{}_mt{}".format(imgset, len(content), N))
        with open(os.path.join(imgsets_path, "{}mt{}.txt".format(imgset, N)), 'w') as f:
            f.writelines(content)
def create_inner_imageset(ct, excl_train_mtN):
    '''
    inner is the inner set between train of excl{dataset} and trainval of {dataset}
    '''
    print("------processing {}-----------".format(ct))
    imgsets_path = "../data/Food/Food_{}/ImageSets".format(ct)
    anno_path = "../data/Food/Food_{}/Annotations".format(ct)

    if excl_train_mtN == 0:
        excl_class = get_categories("excl"+ct+"_train")
    else:
        excl_class = get_categories(
            "excl"+ct+"_trainmt{}".format(excl_train_mtN))
    # 3种方法实现通过回调函数,对xml进行筛选
    # 1. save extra info of callback with class

    fx = filter_xml(excl_class)
    process_all_xml_files_from_dir(anno_path, fx.process)
    print(len(fx.reserver_xmls))
    filter_xmls = fx.reserver_xmls

    # 保存筛选信息
    print("saving inner mt {} sets:{}".format(
        excl_train_mtN, len(filter_xmls)))
    print(imgsets_path)
    if excl_train_mtN == 0:
        saving_file = "inner.txt"
    else:
        saving_file = "innermt{}.txt".format(excl_train_mtN)
    with open(os.path.join(imgsets_path, saving_file), 'w') as f:
        for i in filter_xmls:
            x_name = os.path.split(i)[1]
            x_name = os.path.splitext(x_name)[0]
            f.write(x_name + '\n')
def create_inner_imagesets():
    '''
    inner is the inner set between train of excl{dataset} and trainval of {dataset}
    '''
    cantten = ['Arts', 'Science', 'TechMixedVeg',
               'TechChicken', 'UTown', 'YIH']

    for ct in cantten:
        print("------processing {}-----------".format(ct))
        imgsets_path = "../data/Food/Food_{}/ImageSets".format(ct)
        anno_path = "../data/Food/Food_{}/Annotations".format(ct)
        for N in [0, 10, 30, 50, 100]:
            if N == 0:
                excl_class = get_categories("excl"+ct+"_train")
            else:
                excl_class = get_categories("excl"+ct+"_trainmt{}".format(N))
            # 3种方法实现通过回调函数,对xml进行筛选
            # 1. save extra info of callback with class
            # fx = filter_xml(tech_classes)
            # process_all_xml_files_from_dir(path, fx.process)
            # print(len(fx.reserver_xmls))

            # 2. save extra info of callback with closet
            fx_clo = filter_clo(excl_class)
            process_all_xml_files_from_dir(anno_path, fx_clo)
            # print(len(fx_clo.__closure__))  # __closure__ 有cell对象的元祖构成
            filter_xmls = fx_clo.__closure__[
                0].cell_contents  # cell 对象有cell_contents的内容

            # 3. 通过协程
            # how to implement??
            # NotImplemented

            # 保存筛选信息
            print("saving inner mt {} sets:{}".format(N, len(filter_xmls)))
            print(imgsets_path)
            if N == 0:
                saving_file = "inner.txt"
            else:
                saving_file = "innermt{}.txt".format(N)
            with open(os.path.join(imgsets_path, saving_file), 'w') as f:
                for i in filter_xmls:
                    x_name = os.path.split(i)[1]
                    x_name = os.path.splitext(x_name)[0]
                    f.write(x_name + '\n')
def create_few_inner_for_cross_domain(ct, imgset, mtN, fewN):
    """select_few_inner_for_train
    Selecting few shot training samples and from the val of canteen

    :param ct:
    :param mtN: N of mt which means the number of training sample is more than N
    :param fewN: the number of selected sample for each categories
    """
    print("------processing {}-selecting few inner--------".format(ct))
    imgsets_path = "../data/Food/Food_{}/ImageSets".format(ct)
    anno_path = "../data/Food/Food_{}/Annotations".format(ct)
    imset_path = os.path.join(imgsets_path, imgset+'.txt')

    if mtN == 0:
        excl_classes = get_categories("excl"+ct+"_train")
    else:
        excl_classes = get_categories("excl"+ct+"_trainmt{}".format(mtN))

    cls_sample_count = {}
    for ex_cls in excl_classes[1:]:
        cls_sample_count[ex_cls] = 0

    few_filter = Xml_in_few_sample_filter(cls_sample_count, fewN)
    dishes = create_dishes(ct, 'innermt10val')
    process_xml_from_file(imset_path, anno_path,
                          few_filter.process)

    # 保存筛选信息

    def saving_file(xmls, imgset):
        print("saving inner few{} mt {} {} sets:{}".format(
            fewN, mtN, imgset, len(xmls)))

        if mtN == 0:
            saving_file = "innermt10valfew{}{}.txt".format(fewN, imgset)
        else:
            saving_file = "innermt10valfew{}mt{}{}.txt".format(
                fewN, mtN, imgset)
        with open(os.path.join(imgsets_path, saving_file), 'w') as f:
            for x_name in xmls:
                f.write(x_name + '\n')

    few_filter.clean_discard_by_dishes(dishes)
    saving_file(few_filter.reserver_xmls, 'train')
    saving_file(few_filter.discard_xmls, 'val')