예제 #1
0
class DNDMVModelRunner(Runner):
    def __init__(self, o: DNMDVModelOptions):
        if o.use_softmax_em:
            from utils import common
            common.cpf = cp.float64

        m = DNDMVModel(o, self)
        super().__init__(m, o, Logger(o))

    def load(self):
        pos_vocab_list = [w.strip() for w in open(self.o.vocab_path)]
        pos_vocab = Vocab.from_list(pos_vocab_list)
        self.train_ds = ConllDataset(self.o.train_ds, pos_vocab=pos_vocab)
        self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=pos_vocab)
        self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=pos_vocab)

        if self.o.pretrained_ds:
            self.pretrained_ds = ConllDataset(self.o.pretrained_ds, pos_vocab=pos_vocab)
        else:
            self.pretrained_ds = None

        self.dev_ds.build_batchs(self.o.batch_size)
        self.test_ds.build_batchs(self.o.batch_size)

        if self.o.emb_path:
            self.word_emb = np.load(self.o.emb_path)
        else:
            self.word_emb = None
예제 #2
0
    def load(self):
        self.train_ds = ConllDataset(self.o.train_ds,
                                     pos_vocab=WSJ_POS,
                                     sort=True)
        self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=WSJ_POS)
        self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=WSJ_POS)

        self.dev_ds.build_batchs(self.o.batch_size)
        self.test_ds.build_batchs(self.o.batch_size)
예제 #3
0
    def load(self):
        self.train_ds = ConllDataset(self.o.train_ds, pos_vocab=WSJ_POS)
        self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=WSJ_POS)
        self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=WSJ_POS)

        self.dev_ds.build_batchs(10000)
        self.test_ds.build_batchs(10000)

        self.o.max_len = 10
        self.o.num_tag = len(WSJ_POS)
예제 #4
0
class DMVModelRunner(Runner):
    def __init__(self, o: DMVModelOptions):
        m = DMVModel(o, self)
        super().__init__(m, o, Logger())

        print(self.workspace)

    def load(self):
        self.train_ds = ConllDataset(self.o.train_ds, pos_vocab=WSJ_POS)
        self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=WSJ_POS)
        self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=WSJ_POS)

        self.dev_ds.build_batchs(10000)
        self.test_ds.build_batchs(10000)

        self.o.max_len = 10
        self.o.num_tag = len(WSJ_POS)
예제 #5
0
class NDMVModelRunner(Runner):
    def __init__(self, o):
        if o.use_softmax_em:
            from utils import common
            common.cpf = cp.float64

        m = NDMVModel(o, self)
        super().__init__(m, o, Logger(o))

    def load(self):
        self.train_ds = ConllDataset(self.o.train_ds,
                                     pos_vocab=WSJ_POS,
                                     sort=True)
        self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=WSJ_POS)
        self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=WSJ_POS)

        self.dev_ds.build_batchs(self.o.batch_size)
        self.test_ds.build_batchs(self.o.batch_size)
예제 #6
0
    def load(self):
        if self.o.use_pair:
            wordpos_vocab_list = [
                w.strip().split() for w in open(self.o.vocab_path)
            ][:self.o.num_lex + 2]
            word_vocab_list = [wp[0] for wp in wordpos_vocab_list]
            word_vocab = Vocab.from_list(word_vocab_list,
                                         unk='<UNK>',
                                         pad='<PAD>')
            self.pos_dict = {
                word_vocab[wp[0]]: list(map(BLLIP_POS.__getitem__, wp[1:]))
                for wp in wordpos_vocab_list if len(wp) > 1
            }
        else:
            word_vocab_list = [w.strip() for w in open(self.o.vocab_path)
                               ][:self.o.num_lex + 2]
            word_vocab = Vocab.from_list(word_vocab_list,
                                         unk='<UNK>',
                                         pad='<PAD>')

        self.train_ds = ConllDataset(self.o.train_ds,
                                     pos_vocab=BLLIP_POS,
                                     word_vocab=word_vocab)

        self.dev_ds = ConllDataset(self.o.dev_ds,
                                   pos_vocab=BLLIP_POS,
                                   word_vocab=word_vocab)
        self.test_ds = ConllDataset(self.o.test_ds,
                                    pos_vocab=BLLIP_POS,
                                    word_vocab=word_vocab)

        if self.o.pretrained_ds:
            self.pretrained_ds = ConllDataset(self.o.pretrained_ds,
                                              pos_vocab=BLLIP_POS,
                                              word_vocab=word_vocab)
        else:
            self.pretrained_ds = None

        self.dev_ds.build_batchs(self.o.batch_size)
        self.test_ds.build_batchs(self.o.batch_size)

        if self.o.use_pair:
            self.o.num_lex = sum([len(p) for p in self.pos_dict.values()])
        self.o.max_len = 10
        self.o.num_tag = len(BLLIP_POS) + self.o.num_lex

        if self.o.emb_path:
            self.word_emb = np.load(self.o.emb_path)[:self.o.num_lex + 2]
        else:
            self.word_emb = None
        self.out_pos_emb = np.load(
            self.o.out_pos_emb_path) if self.o.out_pos_emb_path else None
        self.pos_emb = np.load(
            self.o.pos_emb_path) if self.o.pos_emb_path else None
예제 #7
0
    def load(self):
        pos_vocab_list = [w.strip() for w in open(self.o.vocab_path)]
        pos_vocab = Vocab.from_list(pos_vocab_list)
        self.train_ds = ConllDataset(self.o.train_ds, pos_vocab=pos_vocab)
        self.dev_ds = ConllDataset(self.o.dev_ds, pos_vocab=pos_vocab)
        self.test_ds = ConllDataset(self.o.test_ds, pos_vocab=pos_vocab)

        if self.o.pretrained_ds:
            self.pretrained_ds = ConllDataset(self.o.pretrained_ds, pos_vocab=pos_vocab)
        else:
            self.pretrained_ds = None

        self.dev_ds.build_batchs(self.o.batch_size)
        self.test_ds.build_batchs(self.o.batch_size)

        if self.o.emb_path:
            self.word_emb = np.load(self.o.emb_path)
        else:
            self.word_emb = None
예제 #8
0
class LNDMVModelRunner(Runner):
    def __init__(self, o: LNMDVModelOptions):
        if o.use_softmax_em:
            from utils import common
            common.cpf = cp.float64

        m = LNDMVModel(o, self)
        super().__init__(m, o, Logger(o))

    def load(self):
        if self.o.use_pair:
            wordpos_vocab_list = [
                w.strip().split() for w in open(self.o.vocab_path)
            ][:self.o.num_lex + 2]
            word_vocab_list = [wp[0] for wp in wordpos_vocab_list]
            word_vocab = Vocab.from_list(word_vocab_list,
                                         unk='<UNK>',
                                         pad='<PAD>')
            self.pos_dict = {
                word_vocab[wp[0]]: list(map(BLLIP_POS.__getitem__, wp[1:]))
                for wp in wordpos_vocab_list if len(wp) > 1
            }
        else:
            word_vocab_list = [w.strip() for w in open(self.o.vocab_path)
                               ][:self.o.num_lex + 2]
            word_vocab = Vocab.from_list(word_vocab_list,
                                         unk='<UNK>',
                                         pad='<PAD>')

        self.train_ds = ConllDataset(self.o.train_ds,
                                     pos_vocab=BLLIP_POS,
                                     word_vocab=word_vocab)

        self.dev_ds = ConllDataset(self.o.dev_ds,
                                   pos_vocab=BLLIP_POS,
                                   word_vocab=word_vocab)
        self.test_ds = ConllDataset(self.o.test_ds,
                                    pos_vocab=BLLIP_POS,
                                    word_vocab=word_vocab)

        if self.o.pretrained_ds:
            self.pretrained_ds = ConllDataset(self.o.pretrained_ds,
                                              pos_vocab=BLLIP_POS,
                                              word_vocab=word_vocab)
        else:
            self.pretrained_ds = None

        self.dev_ds.build_batchs(self.o.batch_size)
        self.test_ds.build_batchs(self.o.batch_size)

        if self.o.use_pair:
            self.o.num_lex = sum([len(p) for p in self.pos_dict.values()])
        self.o.max_len = 10
        self.o.num_tag = len(BLLIP_POS) + self.o.num_lex

        if self.o.emb_path:
            self.word_emb = np.load(self.o.emb_path)[:self.o.num_lex + 2]
        else:
            self.word_emb = None
        self.out_pos_emb = np.load(
            self.o.out_pos_emb_path) if self.o.out_pos_emb_path else None
        self.pos_emb = np.load(
            self.o.pos_emb_path) if self.o.pos_emb_path else None