def get_dataloader(instance_num=102400): #instance_num = 102400 datas = [] labels = [] positions = [] for i in range(instance_num): inp_seq = np.zeros((MAXLEN, len(CHARSET)), dtype='int32') cur_len = MAXLEN out_seq = np.zeros((MAXLEN, ), dtype='int32') pos_encoding = pos_to_query for j in range(cur_len): pos = np.random.randint(1, len(CHARSET) - 1) # not generate '@' and '-' inp_seq[j][pos] = 1 out_seq[cur_len - 1 - j] = pos datas.append(inp_seq) labels.append(out_seq) positions.append(pos_encoding) reverse_dataset = ArrayDataset(datas, labels, positions) random_sampler = RandomSampler(reverse_dataset, batch_size) dataloader = DataLoader(reverse_dataset, random_sampler) return dataloader
def build_dataloader(batch_size, dataset_dir, cfg): if cfg.dataset == "VOC2012": train_dataset = dataset.PascalVOC(dataset_dir, cfg.data_type, order=["image", "mask"]) elif cfg.dataset == "Cityscapes": train_dataset = dataset.Cityscapes(dataset_dir, "train", mode='gtFine', order=["image", "mask"]) else: raise ValueError("Unsupported dataset {}".format(cfg.dataset)) train_sampler = Infinite( RandomSampler(train_dataset, batch_size, drop_last=True)) train_dataloader = DataLoader( train_dataset, sampler=train_sampler, transform=T.Compose( transforms=[ T.RandomHorizontalFlip(0.5), T.RandomResize(scale_range=(0.5, 2)), T.RandomCrop( output_size=(cfg.img_height, cfg.img_width), padding_value=[0, 0, 0], padding_maskvalue=255, ), T.Normalize(mean=cfg.img_mean, std=cfg.img_std), T.ToMode(), ], order=["image", "mask"], ), num_workers=2, ) return train_dataloader
def get_dataloader(): instance_num = 102400 datas = [] labels = [] masks = [] for i in range(instance_num): cur_len = np.random.randint(MINLEN, MAXLEN + 1) inp_seq = np.zeros((MAXLEN + 1, len(CHARSET)), dtype='int32') cur_len = MAXLEN mask = np.zeros((MAXLEN + 1, ), dtype='int32') out_seq = np.zeros((MAXLEN + 1, ), dtype='int32') inp_seq[cur_len][len(CHARSET) - 1] = 1 out_seq[cur_len] = len(CHARSET) - 1 mask[:cur_len + 1] = 1 for j in range(cur_len): pos = np.random.randint(1, len(CHARSET) - 1) # not generate '@' and '-' inp_seq[j][pos] = 1 out_seq[cur_len - 1 - j] = pos datas.append(inp_seq) labels.append(out_seq) masks.append(mask) reverse_dataset = ArrayDataset(datas, labels, masks) random_sampler = RandomSampler(reverse_dataset, batch_size) dataloader = DataLoader(reverse_dataset, random_sampler) return dataloader
def __init__(self, input_dimension, num_points, batch_size=16, istrain=True): """ 生成如图1所示的二分类数据集,数据集长度为 num_points """ means = [0.1*n for n in range(input_dimension)] scales =[1 for n in range(input_dimension)] deviation = [0.05 * (-1 if n % 2 ==0 else 1) for n in range(input_dimension)] sd = [0.1 * (-1 if n % 2 ==0 else 1) for n in range(input_dimension)] alls = [] for i in range(input_dimension): m,s = means[i], scales[i] if not istrain: m += deviation[i] s += sd[i] cur = np.random.normal(m, s, num_points).astype(np.float32).reshape(-1, 1) print(cur) alls.append(cur) self.data = np.concatenate(alls, axis=1) super().__init__(self.data) self.random_sampler = RandomSampler(dataset=self, batch_size=batch_size, seed=1024) self.dataloader = DataLoader(dataset=self, sampler=self.random_sampler)
def get_loader(dataset, cfg, mode='train'): assert mode in ('train', 'eval') if mode == 'train': sampler = RandomSampler(dataset, batch_size=cfg.data.samples_per_gpu, drop_last=True, seed=0) loader = DataLoader(dataset, sampler, num_workers=cfg.data.workers_per_gpu) else: samples_per_gpu = cfg.data.get('eval_samples_per_gpu', cfg.data.samples_per_gpu) workers_per_gpu = cfg.data.get('eval_workers_per_gpu', cfg.data.workers_per_gpu) if cfg.evaluation.multi_process is True: sampler = SequentialSampler(dataset, batch_size=samples_per_gpu, drop_last=False) else: sampler = SequentialSampler(dataset, batch_size=samples_per_gpu, drop_last=False, world_size=1, rank=0) loader = DataLoader(dataset, sampler, num_workers=workers_per_gpu) return loader
def build_dataloader(): train_dataset = MNIST(root=gettempdir(), train=True, download=True) dataloader = DataLoader( train_dataset, transform=Compose([Normalize(mean=0.1307 * 255, std=0.3081 * 255), Pad(2), ToMode("CHW"),]), sampler=RandomSampler(dataset=train_dataset, batch_size=64), ) return dataloader
def __init__(self, num_points, batch_size=16): """ 生成如图1所示的二分类数据集,数据集长度为 num_points """ # 初始化一个维度为 (50000, 2) 的 NumPy 数组。 # 数组的每一行是一个横坐标和纵坐标都落在 [-1, 1] 区间的一个数据点 (x, y) # np.random.seed(2020) self.data = np.random.rand(num_points, 2).astype(np.float32) * 2 - 1 # 为上述 NumPy 数组构建标签。每一行的 (x, y) 如果符合 x*y < 0,则对应标签为1,反之,标签为0 self.label = np.zeros(num_points, dtype=np.int32) for i in range(num_points): self.label[i] = 1 if np.prod(self.data[i]) < 0 else 0 super().__init__(self.data, self.label) self.random_sampler = RandomSampler(dataset=self, batch_size=batch_size, seed=1024) self.dataloader = DataLoader(dataset=self, sampler=self.random_sampler)
def build_sampler(train_dataset, batch_size, aspect_grouping=[1]): def _compute_aspect_ratios(dataset): aspect_ratios = [] for i in range(len(dataset)): info = dataset.get_img_info(i) aspect_ratios.append(info["height"] / info["width"]) return aspect_ratios def _quantize(x, bins): return list(map(lambda y: bisect.bisect_right(sorted(bins), y), x)) if len(aspect_grouping) == 0: return Infinite(RandomSampler(train_dataset, batch_size, drop_last=True)) aspect_ratios = _compute_aspect_ratios(train_dataset) group_ids = _quantize(aspect_ratios, aspect_grouping) return Infinite(GroupedRandomSampler(train_dataset, batch_size, group_ids))
@trace def train_func(data, label, net=None, optimizer=None): net.train() pred = net(data) loss = F.cross_entropy_with_softmax(pred, label) optimizer.backward(loss) return pred, loss train_dataset = u_data("./data/train", order=["image", "mask"]) dataloader = DataLoader(train_dataset, transform=Compose([ToMode('CHW')]), sampler=RandomSampler(dataset=train_dataset, batch_size=4, drop_last=True)) unet = Unet(1, 4) optimizer = optim.SGD(unet.parameters(), lr=0.05) trace.enabled = True total_epochs = 50 loss_src = 100000000 for epoch in range(total_epochs): total_loss = 0 correct = 0 total = 0 sta = time.time()
from megengine.data.dataset import MNIST from megengine.data.transform import RandomResizedCrop, Normalize, ToMode, Pad, Compose import megengine.optimizer as optim mge.set_log_file('log.txt') logger = mge.get_logger(__name__) #logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S") # dataset root_dir = '/data/.cache/dataset/MNIST' mnist_train_dataset = MNIST(root=root_dir, train=True, download=False) mnist_test_dataset = MNIST(root=root_dir, train=False, download=False) random_sampler = RandomSampler(dataset=mnist_train_dataset, batch_size=256) sequential_sampler = SequentialSampler(dataset=mnist_test_dataset, batch_size=256) mnist_train_dataloader = DataLoader( dataset=mnist_train_dataset, sampler=random_sampler, transform=Compose([ RandomResizedCrop(output_size=28), # mean 和 std 分别是 MNIST 数据的均值和标准差,图片数值范围是 0~255 #Normalize(mean=0.1307*255, std=0.3081*255), #Pad(2), # 'CHW'表示把图片由 (height, width, channel) 格式转换成 (channel, height, width) 格式 #ToMode('CHW'), ])) mnist_test_dataloader = DataLoader(