def _dataset(dataname): normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transformations = transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ]) rootpath = os.path.join('/data/dacheng/Datasets/', dataname) if dataname == 'NUSWIDE': dset_database = dataset.NUSWIDE('train_img.txt', 'train_label.txt', transformations) dset_test = dataset.NUSWIDE('test_img.txt', 'test_label.txt', transformations) elif dataname == 'MirFlickr': dset_database = dataset.MirFlickr('train_img.txt', 'train_label.txt', transformations) dset_test = dataset.MirFlickr('test_img.txt', 'test_label.txt', transformations) elif dataname == 'COCO': dset_database = dataset.COCO('train_img.txt', 'train_label.txt', transformations) dset_test = dataset.COCO('test_img.txt', 'test_label.txt', transformations) elif dataname == 'CIFAR10': dset_database = dataset.CIFAR10('train_img.txt', 'train_label.txt', transformations) dset_test = dataset.CIFAR10('test_img.txt', 'test_label.txt', transformations) elif dataname == 'MNIST': dset_database = dataset.MNIST(True, transformations) dset_test = dataset.MNIST(False, transformations) num_database, num_test = len(dset_database), len(dset_test) def load_label(filename, DATA_DIR): path = os.path.join(DATA_DIR, filename) fp = open(path, 'r') labels = [x.strip() for x in fp] fp.close() return torch.LongTensor(list(map(int, labels))) def DC_load_label(filename, DATA_DIR): path = os.path.join(DATA_DIR, filename) label = np.loadtxt(path, dtype=np.int64) return torch.LongTensor(label) def DC_load_label_MNIST(filename, root): _, labels = torch.load(os.path.join(root, filename)) return torch.LongTensor(labels) if dataname == 'CIFAR10': testlabels_ = load_label('test_label.txt', rootpath) databaselabels_ = load_label('train_label.txt', rootpath) testlabels = encoding_onehot(testlabels_) databaselabels = encoding_onehot(databaselabels_) elif dataname == 'MNIST': databaselabels_ = DC_load_label_MNIST( 'training.pt', root='/home/dacheng/PycharmProjects/ADSH_pytorch/data/processed/') testlabels_ = DC_load_label_MNIST( 'test.pt', root='/home/dacheng/PycharmProjects/ADSH_pytorch/data/processed/') testlabels = encoding_onehot(testlabels_) databaselabels = encoding_onehot(databaselabels_) else: databaselabels = DC_load_label('train_label.txt', rootpath) testlabels = DC_load_label('test_label.txt', rootpath) dsets = (dset_database, dset_test) nums = (num_database, num_test) labels = (databaselabels, testlabels) return nums, dsets, labels
def _dataset(dataname): normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transformations = transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ]) rootpath = os.path.join('/data/dacheng/Datasets/', dataname) if dataname == 'NUSWIDE': dset_database = dataset.NUSWIDE('train_img.txt', 'train_label.txt', transformations) dset_test = dataset.NUSWIDE('test_img.txt', 'test_label.txt', transformations) elif dataname == 'MirFlickr': dset_database = dataset.MirFlickr('train_img.txt', 'train_label.txt', transformations) dset_test = dataset.MirFlickr('test_img.txt', 'test_label.txt', transformations) elif dataname == 'COCO': dset_database = dataset.COCO('train_img.txt', 'train_label.txt', transformations) dset_test = dataset.COCO('test_img.txt', 'test_label.txt', transformations) elif dataname == 'CIFAR10': dset_database = dataset.CIFAR10('train_img.txt', 'train_label.txt', transformations) dset_test = dataset.CIFAR10('test_img.txt', 'test_label.txt', transformations) num_database, num_test = len(dset_database), len(dset_test) def load_label(filename, DATA_DIR): path = os.path.join(DATA_DIR, filename) fp = open(path, 'r') labels = [x.strip() for x in fp] fp.close() return torch.LongTensor(list(map(int, labels))) def DC_load_label(filename, DATA_DIR): path = os.path.join(DATA_DIR, filename) label = np.loadtxt(path, dtype=np.int64) return torch.LongTensor(label) def load_label2(root, train=True): base_folder = 'cifar-10-batches-py' train_list = [ ['data_batch_1', 'c99cafc152244af753f735de768cd75f'], ['data_batch_2', 'd4bba439e000b95fd0a9bffe97cbabec'], ['data_batch_3', '54ebc095f3ab1f0389bbae665268c751'], ['data_batch_4', '634d18415352ddfa80567beed471001a'], ['data_batch_5', '482c414d41f54cd18b22e5b47cb7c3cb'], ] test_list = [ ['test_batch', '40351d587109b95175f43aff81a1287e'], ] root = os.path.expanduser(root) train = train # training set or test set # now load the picked numpy arrays if train: train_data = [] train_labels = [] for fentry in train_list: f = fentry[0] file = os.path.join(root, base_folder, f) fo = open(file, 'rb') if sys.version_info[0] == 2: entry = pickle.load(fo) else: entry = pickle.load(fo, encoding='latin1') train_data.append(entry['data']) if 'labels' in entry: train_labels += entry['labels'] else: train_labels += entry['fine_labels'] fo.close() else: f = test_list[0][0] file = os.path.join(root, base_folder, f) fo = open(file, 'rb') if sys.version_info[0] == 2: entry = pickle.load(fo) else: entry = pickle.load(fo, encoding='latin1') test_data = entry['data'] if 'labels' in entry: test_labels = entry['labels'] else: test_labels = entry['fine_labels'] fo.close() if train: target = train_labels else: target = test_labels return torch.LongTensor(list(map(int, target))) if dataname == 'CIFAR10': testlabels_ = load_label('train_label.txt', rootpath) databaselabels_ = load_label('train_label.txt', rootpath) testlabels = encoding_onehot(testlabels_) databaselabels = encoding_onehot(databaselabels_) else: databaselabels = DC_load_label('train_label.txt', rootpath) testlabels = DC_load_label('test_label.txt', rootpath) # testlabels2 = load_label2('/home/dacheng/PycharmProjects/ADSH_pytorch/data', train=False) # databaselabels2 = load_label2('/home/dacheng/PycharmProjects/ADSH_pytorch/data', train=True) # testlabels = encoding_onehot(testlabels2) # databaselabels = encoding_onehot(databaselabels2) dsets = (dset_database, dset_test) nums = (num_database, num_test) labels = (databaselabels, testlabels) return nums, dsets, labels