Exemplo n.º 1
0
    def __setattr__(self, key, value):
        if key == '_vocabs':
            conll_idxs = set([
                vocab.conll_idx for vocab in value
                if hasattr(vocab, 'conll_idx')
            ])
            assert len(conll_idxs) == 1
            self._conll_idx = list(conll_idxs)[0]
        super(Multivocab, self).__setattr__(key, value)


#***************************************************************
if __name__ == '__main__':
    """"""

    from parser.vocabs import PretrainedVocab, WordVocab, CharVocab, Multivocab

    configurable = Configurable()
    token_vocab = WordVocab.from_configurable(configurable)
    pretrained_vocab = PretrainedVocab.from_vocab(token_vocab)
    subtoken_vocab = CharVocab.from_vocab(token_vocab)
    multivocab = Multivocab.from_configurable(
        configurable, [pretrained_vocab, token_vocab, subtoken_vocab])
    multivocab.add_files(configurable.valid_files)
    multivocab.index_tokens()
    print("Indices for '<PAD>': %s" % str(multivocab.index('<PAD>')))
    print("Indices for 'the': %s" % str(multivocab.index('the')))
    print("Indices for 'The': %s" % str(multivocab.index('The')))
    print('Multivocab passes')
Exemplo n.º 2
0
    matrix = self.embeddings.eval()
    with codecs.open(self.name+'.txt', 'w') as f:
      for idx in xrange(self.START_IDX, len(self)):
        f.write('%s %s\n' % (self[idx], ' '.join(matrix[idx])))
    return
  
  #=============================================================
  @property
  def pretrained_vocab(self):
    return self._pretrained_vocab
  
  #=============================================================
  def __setattr__(self, name, value):
    if name == '_pretrained_vocab':
      self._str2idx = value._str2idx
      self._idx2str = value._idx2str
      self._counts = value._counts
    super(RetrainedVocab, self).__setattr__(name, value)

#***************************************************************
if __name__ == '__main__':
  """"""
  
  from parser import Configurable
  from parser.vocabs import PretrainedVocab
  configurable = Configurable(retrained_vocab={'embed_loss':'cross_entropy', 'retrained_embed_size':50})
  pretrained_vocab = PretrainedVocab.from_configurable(configurable)
  retrained_vocab = RetrainedVocab.from_vocab(pretrained_vocab)
  retrain_loss = retrained_vocab(pretrained_vocab)
  print('RetrainedVocab passes')