def _decode_proc(model, in_queue, out_queue): while True: item = in_queue.get() if item is None: return idx, features = item _, tags = _inf.decodeViterbi_fast(features, model) out_queue.put((idx, tags))
def _cut(self, text): """ 直接对文本分词 """ examples = list(self.feature_extractor.normalize_text(text)) length = len(examples) all_feature = [] # type: List[List[int]] for idx in range(length): node_feature_idx = self.feature_extractor.get_node_features_idx( idx, examples) # node_feature = self.feature_extractor.get_node_features( # idx, examples # ) # node_feature_idx = [] # for feature in node_feature: # feature_idx = self.feature_extractor.feature_to_idx.get(feature) # if feature_idx is not None: # node_feature_idx.append(feature_idx) # if not node_feature_idx: # node_feature_idx.append(0) all_feature.append(node_feature_idx) _, tags = _inf.decodeViterbi_fast(all_feature, self.model) words = [] current_word = None is_start = True for tag, char in zip(tags, text): if is_start: current_word = char is_start = False elif "B" in self.idx_to_tag[tag]: words.append(current_word) current_word = char else: current_word += char if current_word: words.append(current_word) return words
def _decode_single(self, testset: DataSet, model: Model): # n_tag = model.n_tag for example in testset: _, tags = _inf.decodeViterbi_fast(example.features, model) example.predicted_tags = tags