class DNDMVModelRunner(Runner): def __init__(self, o: DNMDVModelOptions): if o.use_softmax_em: from utils import common common.cpf = cp.float64 m = DNDMVModel(o, self) super().__init__(m, o, Logger(o)) def load(self): pos_vocab_list = [w.strip() for w in open(self.o.vocab_path)] pos_vocab = Vocab.from_list(pos_vocab_list) self.train_ds = ConllDataset(self.o.train_ds, pos_vocab=pos_vocab) self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=pos_vocab) self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=pos_vocab) if self.o.pretrained_ds: self.pretrained_ds = ConllDataset(self.o.pretrained_ds, pos_vocab=pos_vocab) else: self.pretrained_ds = None self.dev_ds.build_batchs(self.o.batch_size) self.test_ds.build_batchs(self.o.batch_size) if self.o.emb_path: self.word_emb = np.load(self.o.emb_path) else: self.word_emb = None
def load(self): self.train_ds = ConllDataset(self.o.train_ds, pos_vocab=WSJ_POS, sort=True) self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=WSJ_POS) self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=WSJ_POS) self.dev_ds.build_batchs(self.o.batch_size) self.test_ds.build_batchs(self.o.batch_size)
def load(self): self.train_ds = ConllDataset(self.o.train_ds, pos_vocab=WSJ_POS) self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=WSJ_POS) self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=WSJ_POS) self.dev_ds.build_batchs(10000) self.test_ds.build_batchs(10000) self.o.max_len = 10 self.o.num_tag = len(WSJ_POS)
class DMVModelRunner(Runner): def __init__(self, o: DMVModelOptions): m = DMVModel(o, self) super().__init__(m, o, Logger()) print(self.workspace) def load(self): self.train_ds = ConllDataset(self.o.train_ds, pos_vocab=WSJ_POS) self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=WSJ_POS) self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=WSJ_POS) self.dev_ds.build_batchs(10000) self.test_ds.build_batchs(10000) self.o.max_len = 10 self.o.num_tag = len(WSJ_POS)
class NDMVModelRunner(Runner): def __init__(self, o): if o.use_softmax_em: from utils import common common.cpf = cp.float64 m = NDMVModel(o, self) super().__init__(m, o, Logger(o)) def load(self): self.train_ds = ConllDataset(self.o.train_ds, pos_vocab=WSJ_POS, sort=True) self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=WSJ_POS) self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=WSJ_POS) self.dev_ds.build_batchs(self.o.batch_size) self.test_ds.build_batchs(self.o.batch_size)
def load(self): if self.o.use_pair: wordpos_vocab_list = [ w.strip().split() for w in open(self.o.vocab_path) ][:self.o.num_lex + 2] word_vocab_list = [wp[0] for wp in wordpos_vocab_list] word_vocab = Vocab.from_list(word_vocab_list, unk='<UNK>', pad='<PAD>') self.pos_dict = { word_vocab[wp[0]]: list(map(BLLIP_POS.__getitem__, wp[1:])) for wp in wordpos_vocab_list if len(wp) > 1 } else: word_vocab_list = [w.strip() for w in open(self.o.vocab_path) ][:self.o.num_lex + 2] word_vocab = Vocab.from_list(word_vocab_list, unk='<UNK>', pad='<PAD>') self.train_ds = ConllDataset(self.o.train_ds, pos_vocab=BLLIP_POS, word_vocab=word_vocab) self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=BLLIP_POS, word_vocab=word_vocab) self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=BLLIP_POS, word_vocab=word_vocab) if self.o.pretrained_ds: self.pretrained_ds = ConllDataset(self.o.pretrained_ds, pos_vocab=BLLIP_POS, word_vocab=word_vocab) else: self.pretrained_ds = None self.dev_ds.build_batchs(self.o.batch_size) self.test_ds.build_batchs(self.o.batch_size) if self.o.use_pair: self.o.num_lex = sum([len(p) for p in self.pos_dict.values()]) self.o.max_len = 10 self.o.num_tag = len(BLLIP_POS) + self.o.num_lex if self.o.emb_path: self.word_emb = np.load(self.o.emb_path)[:self.o.num_lex + 2] else: self.word_emb = None self.out_pos_emb = np.load( self.o.out_pos_emb_path) if self.o.out_pos_emb_path else None self.pos_emb = np.load( self.o.pos_emb_path) if self.o.pos_emb_path else None
def load(self): pos_vocab_list = [w.strip() for w in open(self.o.vocab_path)] pos_vocab = Vocab.from_list(pos_vocab_list) self.train_ds = ConllDataset(self.o.train_ds, pos_vocab=pos_vocab) self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=pos_vocab) self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=pos_vocab) if self.o.pretrained_ds: self.pretrained_ds = ConllDataset(self.o.pretrained_ds, pos_vocab=pos_vocab) else: self.pretrained_ds = None self.dev_ds.build_batchs(self.o.batch_size) self.test_ds.build_batchs(self.o.batch_size) if self.o.emb_path: self.word_emb = np.load(self.o.emb_path) else: self.word_emb = None
class LNDMVModelRunner(Runner): def __init__(self, o: LNMDVModelOptions): if o.use_softmax_em: from utils import common common.cpf = cp.float64 m = LNDMVModel(o, self) super().__init__(m, o, Logger(o)) def load(self): if self.o.use_pair: wordpos_vocab_list = [ w.strip().split() for w in open(self.o.vocab_path) ][:self.o.num_lex + 2] word_vocab_list = [wp[0] for wp in wordpos_vocab_list] word_vocab = Vocab.from_list(word_vocab_list, unk='<UNK>', pad='<PAD>') self.pos_dict = { word_vocab[wp[0]]: list(map(BLLIP_POS.__getitem__, wp[1:])) for wp in wordpos_vocab_list if len(wp) > 1 } else: word_vocab_list = [w.strip() for w in open(self.o.vocab_path) ][:self.o.num_lex + 2] word_vocab = Vocab.from_list(word_vocab_list, unk='<UNK>', pad='<PAD>') self.train_ds = ConllDataset(self.o.train_ds, pos_vocab=BLLIP_POS, word_vocab=word_vocab) self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=BLLIP_POS, word_vocab=word_vocab) self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=BLLIP_POS, word_vocab=word_vocab) if self.o.pretrained_ds: self.pretrained_ds = ConllDataset(self.o.pretrained_ds, pos_vocab=BLLIP_POS, word_vocab=word_vocab) else: self.pretrained_ds = None self.dev_ds.build_batchs(self.o.batch_size) self.test_ds.build_batchs(self.o.batch_size) if self.o.use_pair: self.o.num_lex = sum([len(p) for p in self.pos_dict.values()]) self.o.max_len = 10 self.o.num_tag = len(BLLIP_POS) + self.o.num_lex if self.o.emb_path: self.word_emb = np.load(self.o.emb_path)[:self.o.num_lex + 2] else: self.word_emb = None self.out_pos_emb = np.load( self.o.out_pos_emb_path) if self.o.out_pos_emb_path else None self.pos_emb = np.load( self.o.pos_emb_path) if self.o.pos_emb_path else None