Exemplo n.º 1
0
 def __init__(self, *args, **kwargs):
     '''
     field is 'sentence' by default, but can be e.g. pos_sentence.
     '''
     self.field = kwargs.pop('field', 'sentence')
     super(LanguageModel, self).__init__(*args, **kwargs)
     self.class_to_code = {'VBZ': 0, 'VBP': 1}
     self.inflect_verb, _ = gen_inflect_from_vocab(self.vocab_file)
Exemplo n.º 2
0
    def __init__(self,
                 infile,
                 modes=('infreq_pos', ),
                 most_common=10000,
                 skip=0,
                 stop_after=None,
                 verbose=True,
                 criterion=None,
                 vocab_file=filenames.vocab_file):
        '''
        modes is a tuple of one or more of the following modes:
            'word' - write actual words
            'pos' - replace words with their part of speech
            'infreq_pos' - replace infrequent words with their part of speech
        or None, in which case all modes are produced.

        most_common:
            if mode is 'infreq_pos', only retain this number of words,
            replace the rest with part of speech

        skip:
            number of sentences to skip after each sentence (to avoid all
            sentences starting with the same words if the corpus is sorted)

        criterion:
            None, or function that take a dict representing a dependency and
            returns True if the dependency should be kept
        '''
        self.infile = infile
        self.skip = skip
        self.most_common = most_common
        self.stop_after = stop_after
        self.load_freq_dict(vocab_file)
        self.verbose = verbose
        self.inflect_verb, self.inflect_noun = gen_inflect_from_vocab(
            vocab_file)
        self.criterion = criterion

        allowed_modes = ('word', 'pos', 'infreq_pos')
        self.modes = allowed_modes if modes is None else modes
        if set(self.modes) - set(allowed_modes) != set():
            raise ValueError('Only the following modes are allowed: %s' %
                             allowed_modes)
 def __init__(self, *args, **kwargs):
     RNNAcceptor.__init__(self, *args, **kwargs)
     self.class_to_code = {'grammatical': 0, 'ungrammatical': 1}
     self.code_to_class = {x: y for y, x in self.class_to_code.items()}
     self.inflect_verb, _ = gen_inflect_from_vocab(self.vocab_file)
 def __init__(self, *args, **kwargs):
     super(InflectVerb, self).__init__(*args, **kwargs)
     self.inflect_verb, _ = gen_inflect_from_vocab(self.vocab_file)