def __init__(self, pos, chunk, event, classify, mallet_memory='256m'): self.clear_line_counter() self.posTagger = pos_tagger_stdin.PosTagger() if pos else None self.chunkTagger = chunk_tagger_stdin.ChunkTagger() if chunk and pos else None self.eventTagger = event_tagger_stdin.EventTagger() if event and pos else None self.llda = GetLLda() if classify else None if pos and chunk: self.ner_model = 'ner.model' elif pos: self.ner_model = 'ner_nochunk.model' else: self.ner_model = 'ner_nopos_nochunk.model' self.ner = GetNer(self.ner_model, memory=mallet_memory) self.fe = Features.FeatureExtractor('%s/data/dictionaries' % (BASE_DIR)) self.capClassifier = cap_classifier.CapClassifier() self.vocab = Vocab('%s/hbc/data/vocab' % (BASE_DIR)) self.dictMap = {} self.dictMap = self.dictMap i = 1 for line in open('%s/hbc/data/dictionaries' % (BASE_DIR)): dictionary = line.rstrip('\n') self.dictMap[i] = dictionary i += 1 self.dict2index = {} for i in self.dictMap.keys(): self.dict2index[self.dictMap[i]] = i if self.llda: self.dictionaries = Dictionaries('%s/data/LabeledLDA_dictionaries3' % (BASE_DIR), self.dict2index) self.entityMap = {} i = 0 if self.llda: for line in open('%s/hbc/data/entities' % (BASE_DIR)): entity = line.rstrip('\n') self.entityMap[entity] = i i += 1 self.dict2label = {} for line in open('%s/hbc/data/dict-label3' % (BASE_DIR)): (dictionary, label) = line.rstrip('\n').split(' ') self.dict2label[dictionary] = label
def __init__(self): self.numberLines = 0 self.eventTagger = None self.posTagger = None self.chunkTagger = None self.llda = GetLLda() self.ner = GetNer('ner_nopos_nochunk.model') self.fe = Features.FeatureExtractor('%s/data/dictionaries' % (BASE_DIR)) self.capClassifier = cap_classifier.CapClassifier() self.vocab = Vocab('%s/hbc/data/vocab' % (BASE_DIR)) self.dictMap = {} i = 1 for line in open('%s/hbc/data/dictionaries' % (BASE_DIR)): dictionary = line.rstrip('\n') self.dictMap[i] = dictionary i += 1 dict2index = {} for i in self.dictMap.keys(): dict2index[self.dictMap[i]] = i if self.llda: self.dictionaries = Dictionaries( '%s/data/LabeledLDA_dictionaries3' % (BASE_DIR), dict2index) self.entityMap = {} i = 0 if self.llda: for line in open('%s/hbc/data/entities' % (BASE_DIR)): entity = line.rstrip('\n') self.entityMap[entity] = i i += 1 self.dict2label = {} for line in open('%s/hbc/data/dict-label3' % (BASE_DIR)): (dictionary, label) = line.rstrip('\n').split(' ') self.dict2label[dictionary] = label
def displayScores(): for item in computedScores: print(str(item) + " : " + str(computedScores[item])) posTagger = pos_tagger_stdin.PosTagger() chunkTagger = chunk_tagger_stdin.ChunkTagger() eventTagger = event_tagger_stdin.EventTagger() llda = GetLLda() ner_model = 'ner.model' ner = GetNer(ner_model) fe = Features.FeatureExtractor('%s/data/dictionaries' % (BASE_DIR)) capClassifier = cap_classifier.CapClassifier() vocab = Vocab('%s/hbc/data/vocab' % (BASE_DIR)) dictMap = {} i = 1 for line in open('%s/hbc/data/dictionaries' % (BASE_DIR)): dictionary = line.rstrip('\n') dictMap[i] = dictionary i += 1 dict2index = {} for i in dictMap.keys(): dict2index[dictMap[i]] = i if llda: