示例#1
0
    def __init__(self, backoff=None):
        """Setup for SequentialBackoffLemmatizer()

        :param backoff: Next lemmatizer in backoff chain.
        """
        LemmatizerI.__init__(self)
        SequentialBackoffTagger.__init__(self, backoff)
 def __init__(self, list_of_drugs, backoff=None):
     SequentialBackoffTagger.__init__(self, backoff)
     drug_list = [ x.strip() for x in open(list_of_drugs, 'r')]
     self.drugs = {}
     # HashTable it for O(1) lookup
     for drug in drug_list:
         self.drugs[drug] = 1
示例#3
0
文件: backoff.py 项目: vierth/cltk
    def __init__(self, backoff=None):
        """Setup for SequentialBackoffLemmatizer()

        :param backoff: Next lemmatizer in backoff chain.
        """
        LemmatizerI.__init__(self)
        SequentialBackoffTagger.__init__(self, backoff)
 def __init__(self, list_of_drugs, backoff=None):
     SequentialBackoffTagger.__init__(self, backoff)
     drug_list = [x.strip() for x in open(list_of_drugs, 'r')]
     self.drugs = {}
     # HashTable it for O(1) lookup
     for drug in drug_list:
         self.drugs[drug] = 1
示例#5
0
 def __init__(self, *args, **kwargs):
     SequentialBackoffTagger.__init__(self, *args, **kwargs)
     coca_path = os.path.join(os.path.dirname(__file__),
                              '../data/coca_500k.csv')
     coca_list = csv.reader(open(coca_path), delimiter='\t')
     self.tag_map = dict()
     for row in coca_list:
         freq = int(row[0])
         word = row[1].strip()
         pos = row[2].strip()
         self.insertPair(word, pos, freq)
示例#6
0
    def __init__(self, *args, **kwargs):
        SequentialBackoffTagger.__init__(self, *args, **kwargs)

        # maps wordnet tags to claws7 tags
        self.wordnet_tag_map = {
            'n': 'nn',
            's': 'jj',
            'a': 'jj',
            'r': 'rr',
            'v': 'vv0'
        }

        self.wordnet = wordnet
示例#7
0
    def __init__(self, *args, **kwargs):
        SequentialBackoffTagger.__init__(self, *args, **kwargs)
        self.dist = FreqDist()
        
#       train_sents = brown.tagged_sents()
        train_sents = pickle.load(open("pickles/brown_clawstags.pickle"))
        # make sure all tuples are in the required format: (TAG, word)
        train_sents = [[t for t in sentence if len(t) == 2] for sentence in train_sents]

        default_tagger = DefaultTagger('nn')
        wn_tagger      = WordNetTagger(default_tagger)
        names_tagger   = NamesTagger(wn_tagger)
        coca_tagger    = COCATagger(names_tagger)
        bigram_tagger  = BigramTagger(train_sents, backoff=coca_tagger)
        trigram_tagger = TrigramTagger(train_sents, backoff=bigram_tagger)
        
        # doesn't include self cause it's a dumb tagger (would always return None)
        self._taggers = trigram_tagger._taggers 
示例#8
0
    def __init__(self: object, backoff: object, verbose: bool = False):
        """
        Setup for SequentialBackoffLemmatizer

        :param backoff: Next lemmatizer in backoff chain
        :param verbose: Flag to include which lemmatizer assigned in a given tag in the return tuple
        """
        SequentialBackoffTagger.__init__(self, backoff=None)
        # Setup backoff chain
        if backoff is None:
            self._taggers = [self]
        else:
            self._taggers = [self] + backoff._taggers

        self.VERBOSE = verbose
        self.repr = reprlib.Repr()
        self.repr.maxlist = 1
        self.repr.maxdict = 1
示例#9
0
文件: backoff.py 项目: cltk/cltk
    def __init__(self: object, backoff: object, verbose: bool = False):
        """
        Setup for SequentialBackoffLemmatizer
        :param backoff: Next lemmatizer in backoff chain
        :type verbose: bool
        :param verbose: Flag to include which lemmatizer assigned in
            a given tag in the return tuple
        """
        SequentialBackoffTagger.__init__(self, backoff=None)

        # Setup backoff chain
        if backoff is None:
            self._taggers = [self]
        else:
            self._taggers = [self] + backoff._taggers

        self.VERBOSE = verbose
        self.repr = reprlib.Repr()
        self.repr.maxlist = 1
        self.repr.maxdict = 1
示例#10
0
    def __init__(self, *args, **kwargs):
        SequentialBackoffTagger.__init__(self, *args, **kwargs)

        self.dist = FreqDist()

        tagged_brown_path = os.path.join(os.path.dirname(__file__),
                                         '../data/brown_clawstags.pickle')
        train_sents = pickle.load(open(tagged_brown_path, 'rb'))

        # make sure all tuples are in the required format: (TAG, word)
        train_sents = [[t for t in sentence if len(t) == 2]
                       for sentence in train_sents]

        # default_tagger = DefaultTagger('nn')
        wn_tagger = WordNetTagger()
        names_tagger = NamesTagger(wn_tagger)
        coca_tagger = COCATagger(names_tagger)
        bigram_tagger = BigramTagger(train_sents, backoff=coca_tagger)
        trigram_tagger = TrigramTagger(train_sents, backoff=bigram_tagger)

        # doesn't include self cause it's a dumb tagger (would always return None)
        self._taggers = trigram_tagger._taggers
示例#11
0
 def __init__(self, train=None):
     self._morph = pymorphy2.MorphAnalyzer()
     SequentialBackoffTagger.__init__(self, None)
示例#12
0
 def __init__(self, *args, **kwargs):
     SequentialBackoffTagger.__init__(self, *args, **kwargs)
示例#13
0
 def __init__(self, train=None):
     self._morph = pymorphy2.MorphAnalyzer()
     SequentialBackoffTagger.__init__(self, None)