def iters(cls, batch_size: int =32, device: int = 0, root: str ='.data', vectors: Tensor = None, **kwargs) -> Tuple[Iterator, Iterator, Iterator]: text = Field() label = Field(sequential=False) train, valid, test = cls.splits(text, label, root=root, **kwargs) text.build_vocab(train, vectors=vectors) label.build_vocab(train) return BucketIterator.splits( (train, test), batch_size=batch_size, device=device)
params = parser.parse_args() kvs = [(k, v) for k, v in vars(params).items()] kvs.append(('Device', device)) print_kv_box('Current Configuration', kvs) if params.mode == 'debug': tokenizer = WordToCharTokenizer() text_field = Field(tokenize=tokenizer, batch_first=True) ds = RandomizedTextWindowDataset(params.dataset, text_field, params.window_size, topk=params.topk, newline_eos=False) text_field.build_vocab(ds) train_ds, test_ds = ds.split(0.8) iterator = NoisedPreWindowedIterator(train_ds, params.batch_size, params.window_size, 0.0) iterator = PredictMiddleNoisedWindowIterator(iterator, 1) for b in iterator: print(b) i = 1 # model = MLP(51, 27, 1024, 3) # text_field = Field(tokenize=tokenize, batch_first=True) # ds = SplittableLanguageModelingDataset(params.dataset, text_field, newline_eos=False) # text_field.build_vocab(ds) # train, test = ds.split() # model = MLP(51, len(text_field.vocab), 1024, 3) # iterator = PredictMiddleNoisedWindowIterator(ds, 64, 51, 0.1, 1) # for b in iterator: