def create_category_label(): samples = io.load_json(design_root + 'Label/ca_samples.json') cat_entry_list = io.load_str_list(ca_root + 'Anno/list_category_cloth.txt')[2::] cat_list = io.load_str_list(ca_root + 'Anno/list_category_img.txt')[2::] # create category entry cat_entry = [] for cat_str in cat_entry_list: cat_name = ' '.join(cat_str.split()[0:-1]) cat_type = int(cat_str.split()[-1]) cat_entry.append({'entry': cat_name, 'type': cat_type}) io.save_json(cat_entry, design_root + 'Label/cat_entry.json') print('create category entry') # create category label img2id = { s['img_path_org'][s['img_path_org'].find('img')::]: s_id for s_id, s in samples.iteritems() } cat_label = {} for idx, s in enumerate(cat_list): s = s.split() s_id = img2id[s[0]] cat = int(s[1]) - 1 cat_label[s_id] = cat io.save_data(cat_label, design_root + 'Label/ca_cat_label.pkl') print('create category label')
def create_attribute_label(): img_split = io.load_json('datasets/DF_Pose/Label/image_split_dfm_new.json') id_list = img_split['train'] + img_split['test'] attr_entry = io.load_str_list( 'datasets/DeepFashion/In-shop/Anno/list_attr_cloth.txt')[2:] attr_anno = io.load_str_list( 'datasets/DeepFashion/In-shop/Anno/list_attr_items.txt') attr_anno = attr_anno[2:] attr_anno = [l.replace('-1', '0').split() for l in attr_anno] attr_anno = {l[0]: np.array(l[1:], dtype=np.int) for l in attr_anno} label = {} for sid in id_list: s = sid.index('id') + 2 e = s + 8 sid_ori = 'id_' + sid[s:e] label[sid] = attr_anno[sid_ori] # remove attribute entries with no positive sample label_mat = np.array(label.values()) valid_idx = label_mat.sum(axis=0) > 0 print('%d valid attribute entries' % (valid_idx.sum())) label = {k: v[valid_idx] for k, v in label.iteritems()} attr_entry = [e for i, e in enumerate(attr_entry) if valid_idx[i]] attr_label = {'label': label, 'entry': attr_entry} io.save_data(attr_label, 'datasets/DF_Pose/Label/attr_label.pkl')
def create_attr_label(): # attr_list = io.load_str_list(root + 'Anno/list_attr_cloth.txt')[2::] # attr_anno = io.load_str_list(root + 'Anno/list_attr_items.txt')[2::] # fn_out = root + 'Label/attribute_inshop.json' # num_attr = 463 # n_top = 5 attr_list = io.load_str_list('/data2/ynli/datasets/DeepFashion/Category_and_Attribute/Anno/list_attr_cloth.txt')[2::] attr_list = [' '.join(a.split()[0:-1]) for a in attr_list] attr_anno = io.load_str_list('/data2/ynli/datasets/DeepFashion/Category_and_Attribute/Anno/list_attr_img.txt')[2::] fn_out = root + 'Label/attribute_ca.json' num_attr = 1000 n_top = 5 # create label data if not os.path.isfile(fn_out): attr_data = {} for line in attr_anno: line = line.split() item_id = line[0] label = [int(c) for c in line[1::]] assert len(label) == num_attr attr_data[item_id] = label io.save_json(attr_data, fn_out) else: attr_data = io.load_json(fn_out) num_sample = len(attr_data) # most frequent attribute in each attribute type attr_list_ref = io.load_str_list('/data2/ynli/datasets/DeepFashion/Category_and_Attribute/Anno/list_attr_cloth.txt')[2::] attr_type = {' '.join(a.split()[0:-1]): a.split()[-1] for a in attr_list_ref} attr_mat = np.array(attr_data.values(), dtype = float) attr_count = np.where(attr_mat > 0, 1, 0).sum(axis = 0) attr_count_type = {} for i, attr_name in enumerate(attr_list): t = attr_type[attr_name] if attr_name in attr_type else '-1' if t not in attr_count_type: attr_count_type[t] = [] attr_count_type[t].append((attr_name, attr_count[i])) for t in {'1', '2', '3', '4', '5', '-1'}: if t not in attr_count_type: continue attr_count_list = attr_count_type[t] attr_count_list.sort(key = lambda x: x[1], reverse = True) print('attribute type: %s' % t) for attr_name, count in attr_count_list[0:n_top]: print('%s: %d (%.1f %%)' % (attr_name, count, 100. * count / num_sample)) print('\n')
def create_attr_entry(): ''' Create attribute entry list, which contains original 1000 attributes used in Category_and_Attribute benchmark ''' print('loading data...') attr_entry_list = io.load_str_list(ca_root + 'Anno/list_attr_cloth.txt')[2::] attr_label = io.load_data(design_root + 'Label/ca_attr_label.pkl') split = io.load_json(design_root + 'Split/ca_split.json') train_ids = set(split['train']) attr_mat = np.array( [v for k, v in attr_label.iteritems() if k in train_ids], dtype=np.float32) print('computing positive rates') num_sample = len(train_ids) pos_rate = attr_mat.sum(axis=0) / num_sample attr_entry = [] for idx, s in enumerate(attr_entry_list): s = s.split() attr_name = ' '.join(s[0:-1]) attr_type = int(s[-1]) attr_entry.append({ 'entry': attr_name, 'type': attr_type, 'pos_rate': pos_rate[idx] }) io.save_json(attr_entry, design_root + 'Label/attr_entry.json')
def create_split(): ''' Create split following the original partition ''' split_list = io.load_str_list(ca_root + 'Eval/list_eval_partition.txt')[2:] split = {'train': [], 'val': [], 'test': []} samples = io.load_json(design_root + 'Label/ca_samples.json') img2id = { s['img_path_org'][s['img_path_org'].find('img')::]: s_id for s_id, s in samples.iteritems() } for s in split_list: img_path, status = s.split() s_id = img2id[img_path] split[status].append(s_id) io.mkdir_if_missing(design_root + 'Split') io.save_json(split, design_root + 'Split/ca_split.json') print('create split') for status in ['train', 'val', 'test']: print('%s: %d' % (status, len(split[status]))) split_trainval = { 'train': split['train'] + split['val'], 'test': split['test'] } io.save_json(split_trainval, design_root + 'Split/ca_split_trainval.json')
def create_synthesis_to_CA_index(): ''' create an index map A: img_syn[i] = img_ca[A[i]] ''' import scipy.io syn_name_list = io.load_str_list( 'datasets/DeepFashion/Fashion_synthesis/data_release/benchmark/name_list.txt' ) samples = io.load_json( 'datasets/DeepFashion/Fashion_design/Label/ca_samples.json') ca_name2idx = { s['img_path_org'][s['img_path_org'].find('img/')::]: int(s_id[3::]) for s_id, s in samples.iteritems() } ca_name2sz = {} for i, s in enumerate(samples.values()): img = image.imread(s['img_path_org']) h, w = img.shape[0:2] ca_name2sz[s['img_path_org'][s['img_path_org'].find('img/')::]] = (w, h) print('load ca image size: %d/%d' % (i, len(samples))) syn_idx_list = [ca_name2idx[name] for name in syn_name_list] syn_org_size_list = [ca_name2sz[name] for name in syn_name_list] data_out = { 'syn2ca_index': syn_idx_list, 'syn2ca_width': [w for w, _ in syn_org_size_list], 'syn2ca_height': [h for _, h in syn_org_size_list] } fn_out = 'datasets/DeepFashion/Fashion_synthesis/data_release/benchmark/index_to_Category_and_Attribute.mat' scipy.io.savemat(fn_out, data_out)
def create_split(): ''' create split file. follow the partition used in VITON paper ''' train_pairs = io.load_str_list(zalando_root + 'Source/viton_train_pairs.txt') test_piars = io.load_str_list(zalando_root + 'Source/viton_test_pairs.txt') split = {} for subset, pairs in [('train', train_pairs), ('test', test_piars)]: id_list = [p.split()[0][0:6] for p in pairs] split[subset] = id_list split['debug'] = split['train'][0:32] io.mkdir_if_missing(zalando_root + 'Split') io.save_json(split, zalando_root + 'Split/zalando_split.json')
def create_gan_split(): ''' create split for gan training ca_gan: containing all frontal images ca_gan_upper: containing all frontial, upperbody images ''' samples = io.load_json(design_root + 'Label/ca_samples.json') split = io.load_json(design_root + 'Split/ca_split_trainval.json') # use same image set as in Zhu Shizhan's ICCV17 FashionGAN paper img_list = io.load_str_list( 'datasets/DeepFashion/Fashion_synthesis/data_release/benchmark/name_list.txt' ) img_set = set(img_list) split_gan = {'train': [], 'test': []} split_gan_upper = {'train': [], 'test': []} for set_name in ['train', 'test']: for s_id in split[set_name]: s = samples[s_id] img_name = s['img_path_org'] img_name = img_name[img_name.find('img/')::] if img_name in img_set: split_gan[set_name].append(s_id) if s['cloth_type'] == 1: split_gan_upper[set_name].append(s_id) print('create split "split_gan"') print('train: %d, test: %d, total: %d' % (len(split_gan['train']), len(split_gan['test']), len(split_gan['train']) + len(split_gan['test']))) print('create split "split_gan_upper"') print('train: %d, test: %d, total: %d' % (len(split_gan_upper['train']), len(split_gan_upper['test']), len(split_gan_upper['train']) + len(split_gan['test']))) io.save_json(split_gan, design_root + 'Split/ca_gan_split_trainval.json') io.save_json(split_gan_upper, design_root + 'Split/ca_gan_split_trainval_upper.json')
def create_sample_index_and_label(): ''' Create sample index and label for Category_and_Attribute data - sample index - landmark label - bbox label - attribute label ''' # config dir_label = design_root + 'Label/' # create sample index and landmark label landmark_list = io.load_str_list(ca_root + 'Anno/list_landmarks.txt')[2::] img_root_org = ca_root + 'Img/' samples = {} landmark_label = {} for idx, s in enumerate(landmark_list): img_id = 'ca_' + str(idx) s = s.split() img_path_org = os.path.join(img_root_org, s[0]) # 1: upper-body, 2: lower-body, 3: full-body cloth_type = int(s[1]) pose_type = -1 lm_str = s[2::] if cloth_type == 1: assert len(lm_str) == 18 elif cloth_type == 2: assert len(lm_str) == 12 elif cloth_type == 3: assert len(lm_str) == 24 # lm is a list: [(x_i, y_i, v_i)] lm = [(float(lm_str[i + 1]), float(lm_str[i + 2]), int(lm_str[i])) for i in range(0, len(lm_str), 3)] samples[img_id] = { 'img_id': img_id, 'cloth_type': cloth_type, 'pose_type': pose_type, 'img_path_org': img_path_org } landmark_label[img_id] = lm io.mkdir_if_missing(dir_label) io.save_json(samples, os.path.join(dir_label, 'ca_samples.json')) io.save_data(landmark_label, os.path.join(dir_label, 'ca_landmark_label.pkl')) print('create sample index (%d samples)' % len(samples)) print('create landmark label') img2id = { s['img_path_org'][s['img_path_org'].find('img')::]: s_id for s_id, s in samples.iteritems() } # create bbox label bbox_list = io.load_str_list(ca_root + 'Anno/list_bbox.txt')[2::] bbox_label = {} for s in bbox_list: s = s.split() assert len(s) == 5 s_id = img2id[s[0]] bbox = [float(x) for x in s[1::]] bbox_label[s_id] = bbox io.save_data(bbox_label, os.path.join(dir_label, 'ca_bbox_label.pkl')) print('create bbox label') # create attribute label attr_list = io.load_str_list(ca_root + 'Anno/list_attr_img.txt')[2::] attr_label = {} for idx, s in enumerate(attr_list): s = s.split() s_id = img2id[s[0]] att = [1 if c == '1' else 0 for c in s[1::]] assert len(att) == 1000 attr_label[s_id] = att print('\rcreating attribute label %d / %d' % (idx, len(attr_list)), end='') io.save_data(attr_label, os.path.join(dir_label, 'ca_attr_label.pkl')) print('\ncreate attribute label')
def create_split(): ''' Split dataset into train/test sets, where ttems are NOT overlapped. In original split, train/test sets have equal size. We create our own split with a larger train set. ''' # config use_original = False train_rate = 0.8 # load sample samples = io.load_json(design_root + 'Label/inshop_samples.json') if use_original: # load split file split_list = io.load_str_list(inshop_root + 'Eval/list_eval_partition.txt')[2::] item2split = {} for line in split_list: line = line.split() item_id = line[1] status = line[2] if status == 'train': item2split[item_id] = 'train' else: item2split[item_id] = 'test' else: category2item = {} for s in samples.values(): cat = s['category'] if cat not in category2item: category2item[cat] = [] category2item[cat].append(s['item_id']) item2split = {} np.random.seed(0) cat_list = category2item.keys() cat_list.sort() for cat in cat_list: item_list = list(set(category2item[cat])) item_list.sort() np.random.shuffle(item_list) split_point = int(len(item_list) * train_rate) for item_id in item_list[0:split_point]: item2split[item_id] = 'train' for item_id in item_list[split_point::]: item2split[item_id] = 'test' # check overlap train_set = set( [item_id for item_id, s in item2split.iteritems() if s == 'train']) test_set = set( [item_id for item_id, s in item2split.iteritems() if s == 'test']) assert not set.intersection(train_set, test_set) # create split split = {'train': [], 'test': []} for s_id, s in samples.iteritems(): split[item2split[s['item_id']]].append(s_id) print('train set: %d items, %d images' % (item2split.values().count('train'), len(split['train']))) print('test set: %d items, %d images' % (item2split.values().count('test'), len(split['test']))) fn_out = design_root + 'Split/inshop_split.json' io.save_json(split, fn_out)
def create_sample_index_and_label(): ''' Create sample index and label for In-shop datasets - sample index - landmark label - bbox label ''' # config dir_label = design_root + 'Label/' # create sampel index and landmark label landmark_list = io.load_str_list(inshop_root + 'Anno/list_landmarks_inshop.txt')[2::] img_root_org = inshop_root + 'Img/' samples = {} landmark_label = {} for idx, s in enumerate(landmark_list): img_id = 'inshop_' + str(idx) s = s.split() img_path_org = os.path.join(img_root_org, s[0]) item_id = img_path_org.split('/')[-2] category = img_path_org.split('/')[-3] # 1: upper-body, 2: lower-body, 3: full-body cloth_type = int(s[1]) # 1: normal, 2: medium, 3: large, 4: medium zoom-in, 5: larg zoom-in, 6: flat (no person) pose_type = int(s[2]) lm_str = s[3::] if cloth_type == 1: assert len(lm_str) == 18 elif cloth_type == 2: assert len(lm_str) == 12 elif cloth_type == 3: assert len(lm_str) == 24 # lm is a list: [(x_i, y_i, v_i)] lm = [(float(lm_str[i + 1]), float(lm_str[i + 2]), int(lm_str[i])) for i in range(0, len(lm_str), 3)] samples[img_id] = { 'img_id': img_id, 'item_id': item_id, 'category': category, 'cloth_type': cloth_type, 'pose_type': pose_type, 'img_path_org': img_path_org } landmark_label[img_id] = lm io.mkdir_if_missing(dir_label) io.save_json(samples, os.path.join(dir_label, 'inshop_samples.json')) io.save_data(landmark_label, os.path.join(dir_label, 'inshop_landmark_label.pkl')) print('create sample index (%d samples)' % len(samples)) print('create landmark label') img2id = { s['img_path_org'][s['img_path_org'].find('img')::]: s_id for s_id, s in samples.iteritems() } # create bbox label bbox_list = io.load_str_list(inshop_root + 'Anno/list_bbox_inshop.txt')[2::] bbox_label = {} for s in bbox_list: s = s.split() assert len(s) == 7 s_id = img2id[s[0]] bbox = [float(x) for x in s[3::]] bbox_label[s_id] = bbox io.save_data(bbox_label, os.path.join(dir_label, 'inshop_bbox_label.pkl')) print('create bbox label')
def create_index(): high_res = True copy_img = False bbox_list = io.load_str_list(root + '/Anno/list_bbox_inshop.txt')[2::] samples = {} if high_res: org_img_root = root + 'Img/img_highres/' new_img_root = root + 'Img/img1_highres/' fn_out = root + 'Label/samples_highres.json' else: org_img_root = root + 'Img/img/' new_img_root = root + 'Img/img1/' fn_out = root + 'Label/samples.json' io.mkdir_if_missing(new_img_root) num_fail = 0 for idx, s in enumerate(bbox_list): img_id = str(idx) s = s.split() assert len(s) == 7 org_path = s[0] org_path_split = org_path.split('/')[1:] org_path = '/'.join(org_path_split) gender = org_path_split[0] category = org_path_split[1] item_id = org_path_split[2] img_path = '/'.join(org_path_split[2:4]) pose = org_path_split[3].split('_')[2][0:-4] assert pose in {'front', 'back', 'side', 'full', 'flat', 'additional'} fn_src = org_img_root + '/'+ org_path fn_tar = new_img_root + '/' + img_path if not os.path.isfile(fn_src): num_fail += 1 print(fn_src) continue samples[img_id] = { 'img_id': img_id, 'item_id': item_id, 'img_path': img_path, 'gender': gender, 'category': category, 'pose': pose, 'org_path': org_path, } if copy_img: io.mkdir_if_missing(new_img_root + item_id) shutil.copyfile(fn_src, fn_tar) # print('%s: %s => %s' % (img_id, fn_src, fn_tar)) # print('%s' % img_id) io.save_json(samples, fn_out) print('\n') print('save sample index to %s' % fn_out) print('%d samples not found!' % num_fail)