def load_tagger(model_path): """Loads tagger from a CRFSUITE binary model file. :param str model_path: path to the binary model file. """ tagger = Tagger() tagger.open(model_path) return tagger
def _load_tagger(self): # In pycrfsuite, you have to save the model first, then load it as a tagger self.model_name = 'model_{}'.format(self.task_obj.unique_id) file_path = os.path.join(MODELS_DIR, self.task_type, self.model_name) try: tagger = Tagger() tagger.open(file_path) except Exception as e: print(e) logging.getLogger(ERROR_LOGGER).error('Failed to load crf model from the filesystem.', exc_info=True, extra={ 'model_name': self.model_name, 'file_path': file_path}) self.tagger = tagger return self.tagger
def gen(corpus=test, model='m.model', indir=INDIR, outdir=''): tagger = Tagger() tagger.open(model) for doc in corpus.documents: path = setup_newdir(doc.filepath, olddir=indir, newdir=outdir, suffix='--', renew=True) if not path: continue mkparentdirs(path) task = etree.Element(TASK_ROOT) tags = etree.Element(TAGS_ROOT) tokens = etree.Element(TOKENS_ROOT) task.append(tags) task.append(tokens) sents = doc.sentences seqs = doc.sequence_list() tagged_seqs = [tagger.tag(seq) for seq in seqs] freq_dict = defaultdict(int) for (sent, seq, tagged_seq) in zip(sents, seqs, tagged_seqs): s = etree.Element('s') for (lex, feat, label) in zip(sent.getchildren(), seq, tagged_seq): lex_tag = etree.Element(lex.tag, lex.attrib) lex_tag.text = lex.text s.append(lex_tag) if label != 'None': iso_tag = etree.Element(label) if label in attribs: for key in attribs[label]: iso_tag.attrib[key] = attribs[label][key] iso_tag.attrib['text'] = lex.text iso_tag.attrib['id'] = ids[label] + str(freq_dict[label]) lex_tag.attrib['id'] = iso_tag.attrib['id'] freq_dict[label] += 1 tags.append(iso_tag) tokens.append(s) s = etree.tostring(task, pretty_print=True) with open(path, 'w') as f: print>>f, HEADER print>>f, s
def test_open_close_labels(model_filename, yseq): tagger = Tagger() with pytest.raises(ValueError): # tagger should be closed, so labels() method should fail here labels = tagger.labels() with tagger.open(model_filename): labels = tagger.labels() assert set(labels) == set(yseq) with pytest.raises(ValueError): # tagger should be closed, so labels() method should fail here labels = tagger.labels()
class PassageTagger(object): def __init__(self, do_train=False, trained_model_name="passage_crf_model", algorithm="crf"): self.trained_model_name = trained_model_name self.fp = FeatureProcessing() self.do_train = do_train self.algorithm = algorithm if algorithm == "crf": if do_train: self.trainer = Trainer() else: self.tagger = Tagger() else: if do_train: model = ChainCRF() self.trainer = FrankWolfeSSVM(model=model) self.feat_index = {} self.label_index = {} else: self.tagger = pickle.load(open(self.trained_model_name, "rb")) self.feat_index = pickle.load(open("ssvm_feat_index.pkl", "rb")) label_index = pickle.load(open("ssvm_label_index.pkl", "rb")) self.rev_label_index = {i: x for x, i in label_index.items()} def read_input(self, filename): str_seqs = [] str_seq = [] feat_seqs = [] feat_seq = [] label_seqs = [] label_seq = [] for line in codecs.open(filename, "r", "utf-8"): lnstrp = line.strip() if lnstrp == "": if len(str_seq) != 0: str_seqs.append(str_seq) str_seq = [] feat_seqs.append(feat_seq) feat_seq = [] label_seqs.append(label_seq) label_seq = [] else: if self.do_train: clause, label = lnstrp.split("\t") label_seq.append(label) else: clause = lnstrp str_seq.append(clause) feats = self.fp.get_features(clause) feat_dict = {} for f in feats: if f in feat_dict: feat_dict[f] += 1 else: feat_dict[f] = 1 #feat_dict = {i: v for i, v in enumerate(feats)} feat_seq.append(feat_dict) if len(str_seq) != 0: str_seqs.append(str_seq) str_seq = [] feat_seqs.append(feat_seq) feat_seq = [] label_seqs.append(label_seq) label_seq = [] return str_seqs, feat_seqs, label_seqs def predict(self, feat_seqs): print >>sys.stderr, "Tagging %d sequences"%len(feat_seqs) if self.algorithm == "crf": self.tagger.open(self.trained_model_name) preds = [self.tagger.tag(ItemSequence(feat_seq)) for feat_seq in feat_seqs] else: Xs = [] for fs in feat_seqs: X = [] for feat_dict in fs: x = [0] * len(self.feat_index) for f in feat_dict: if f in self.feat_index: x[self.feat_index[f]] = feat_dict[f] X.append(x) Xs.append(numpy.asarray(X)) pred_ind_seqs = self.tagger.predict(Xs) preds = [] for ps in pred_ind_seqs: pred = [] for pred_ind in ps: pred.append(self.rev_label_index[pred_ind]) preds.append(pred) return preds def train(self, feat_seqs, label_seqs): print >>sys.stderr, "Training on %d sequences"%len(feat_seqs) if self.algorithm == "crf": for feat_seq, label_seq in zip(feat_seqs, label_seqs): self.trainer.append(ItemSequence(feat_seq), label_seq) self.trainer.train(self.trained_model_name) else: for fs in feat_seqs: for feat_dict in fs: for f in feat_dict: if f not in self.feat_index: self.feat_index[f] = len(self.feat_index) Xs = [] for fs in feat_seqs: X = [] for feat_dict in fs: x = [0] * len(self.feat_index) for f in feat_dict: x[self.feat_index[f]] = feat_dict[f] X.append(x) Xs.append(numpy.asarray(X)) for ls in label_seqs: for label in ls: if label not in self.label_index: self.label_index[label] = len(self.label_index) Ys = [] for ls in label_seqs: Y = [] for label in ls: Y.append(self.label_index[label]) Ys.append(numpy.asarray(Y)) self.trainer.fit(Xs, Ys) pickle.dump(self.trainer, open(self.trained_model_name, "wb")) pickle.dump(self.feat_index, open("ssvm_feat_index.pkl", "wb")) pickle.dump(self.label_index, open("ssvm_label_index.pkl", "wb"))
def test_open_invalid_with_correct_signature(tmpdir): tmp = tmpdir.join('tmp.txt') tmp.write(b"lCRFfoo"*100) tagger = Tagger() with pytest.raises(ValueError): tagger.open(str(tmp))
def test_open_invalid_small(tmpdir): tmp = tmpdir.join('tmp.txt') tmp.write(b'foo') tagger = Tagger() with pytest.raises(ValueError): tagger.open(str(tmp))
def test_open_invalid(): tagger = Tagger() with pytest.raises(ValueError): tagger.open(__file__)
def test_open_non_existing(): tagger = Tagger() with pytest.raises(IOError): tagger.open('foo')
def evaluate_model_by_story(model_name, test_samples): model = Tagger() model.open(model_name) story_fps = dict() for sample in test_samples: model.set(build_model_features(sample, 17, True)) predicted_labels = model.tag() chars = list(sample.sentence) predicted_fps = [] fp = '' for index, word in enumerate(predicted_labels): if word == 'E' or word == 'S': fp += chars[index] predicted_fps.append(fp) fp = '' if word == 'B' or word == 'I': fp += chars[index] actual_fps = [fp for fp in sample.fps if fp != '' and fp != 'null' and fp in sample.sentence] filtered_predicted_fps = predicted_fps # for predicted_fp in predicted_fps: # lan_confidence_temp = lmmodel.score(predicted_fp, bos=True, eos=True) / len(predicted_fp) # if len(re.findall('[a-zA-Z0-9+]+', predicted_fp)) > 0: # lan_confidence_temp += 5 # if lan_confidence_temp > -2.4: # filtered_predicted_fps.append(predicted_fp) if sample.story_id not in story_fps: story_fps[sample.story_id] = [set(actual_fps), set(filtered_predicted_fps)] else: story_fps[sample.story_id][0].update(actual_fps) story_fps[sample.story_id][1].update(filtered_predicted_fps) # print(len(story_fps)) global sim_t sim_threshold = sim_t TP_precision = 0 TP_recall = 0 all_actual_fps = 0 all_predicted_fps = 0 for story_id, (actual_fps, predicted_fps) in story_fps.items(): story_precision = 0.0 story_recall = 0.0 all_actual_fps += len(actual_fps) all_predicted_fps += len(predicted_fps) # for actual_fp in actual_fps: story = samples_dao.read_story_by_story_id(int(story_id)) data = [story_id, story[0] if story is not None else '', story[1] if story is not None else '', story[2] if story is not None else '', story[3] if story is not None else '', story[4] if story is not None else '', actual_fps, predicted_fps] with open('../Archive/date_performance/resultsIterRes_by_story_details.csv', 'a', newline='') as csv_file: csv_writer = csv.writer(csv_file) csv_writer.writerow(data) for predicted_fp in predicted_fps: sim = [] for actual_fp in actual_fps: similarity = 1-distance.nlevenshtein(actual_fp, predicted_fp, method=1) # if actual_fp in predicted_fp: # similarity = 1 sim.append(similarity) # print(sim) if len(sim) == 0: sim = [0] if max(sim) >= sim_threshold: TP_precision += 1 story_precision += 1 for actual_fp in actual_fps: sim = [] for predicted_fp in predicted_fps: similarity = 1-distance.nlevenshtein(actual_fp, predicted_fp, method=1) sim.append(similarity) # print(sim) if len(sim) == 0: sim = [0] if max(sim) >= sim_threshold: TP_recall += 1 story_recall += 1 # 每个故事的详情 story_precision = 0 if len(filtered_predicted_fps) == 0 else story_precision/len(filtered_predicted_fps) story_recall = 0 if len(actual_fps) == 0 else story_recall/len(actual_fps) data = ["STORY " + story_id, story_precision, story_recall] with open('../Archive/date_performance/results/story_details.csv', 'a', newline='') as csv_file: csv_writer = csv.writer(csv_file) csv_writer.writerow(data) with open('../Archive/date_performance/results/story_details.csv', 'a', newline='') as csv_file: csv_writer = csv.writer(csv_file) csv_writer.writerow(["THE END!!!"]) # 整体的详情 precision = TP_precision/all_predicted_fps recall = TP_recall/all_actual_fps f1 = 2 * precision * recall / (precision + recall) print("By Story: Iteration: %s\n\tPrecision: %f\n\tRecall: %f\n\tF1: %f\n\n\n" % (model_name.split('_')[2], precision, recall, f1)) data = ["BY STORY: Iteration " + model_name.split('_')[2], precision, recall, f1] with open('../Archive/date_performance/results/IterRes_by_story.csv', 'a', newline='') as csv_file: csv_writer = csv.writer(csv_file) csv_writer.writerow(data) return precision, recall, f1
def evaluate_model(model_name, test_samples): ''' 最后一次迭代训练模型,并输出测试结果 :param test_samples: :param model_name: :return: ''' model = Tagger() model.open(model_name) accuracy = 0.0 recall = 0.0 f1 = 0.0 # sample_accuracy = 0.0 iteration_test_details = [] for sample in test_samples: model.set(build_model_features(sample, 17, True)) predicted_labels = model.tag() true_labels = sample.char_label predicted_label_index = [] for predicted_label in predicted_labels: if predicted_label == 'N': predicted_label_index.append(0) else: predicted_label_index.append(1) true_label_index = [] for true_label in true_labels: if true_label == 'N': true_label_index.append(0) else: true_label_index.append(1) iteration_test_details = [] chars = list(sample.sentence) # sen_words = sample.sen_words iteration_test_details.append(sample.sentence) predicted_fps = '' actual_fps = '' for index, word in enumerate(predicted_labels): if word != 'N': predicted_fps += chars[index] if len(predicted_fps) == 0: predicted_fps = '-----' for index, word in enumerate(true_labels): if word != 'N': actual_fps += chars[index] iteration_test_details.append(actual_fps) iteration_test_details.append(predicted_fps) with open('../Archive/date_performance/results/Iteration_Test_Details.csv', 'a', newline='') as csv_file: csv_writer = csv.writer(csv_file) csv_writer.writerow(iteration_test_details) # print(sample.sen_words) # print(predicted_labels) # print(true_labels) accuracy += metrics.accuracy_score(true_label_index, predicted_label_index) recall += metrics.recall_score(true_label_index, predicted_label_index, average='binary', pos_label=1) f1 += 2*accuracy*recall/(accuracy+recall) # sample_accuracy += metrics.sequence_accuracy_score(true_labels, predicted_labels) print("Iteration: %s\n\tAccuracy: %f\n\tRecall: %f\n\tF1: %f\n\n\n" % ( model_name.split('_')[2], accuracy / len(test_samples), recall / len(test_samples), f1 / len(test_samples))) data = ["Iteration " + model_name.split('_')[2], accuracy / len(test_samples), recall / len(test_samples), f1 / len(test_samples)] with open('../Archive/date_performance/results/IterRes.csv', 'a', newline='') as csv_file: csv_writer = csv.writer(csv_file) csv_writer.writerow(data) with open('../Archive/date_performance/results/Iteration_Test_Details.csv', 'a', newline='') as csv_file: csv_writer = csv.writer(csv_file) csv_writer.writerow(data) return accuracy / len(test_samples), recall / len(test_samples), f1 / len(test_samples)
def __init__(self, tagger=None): if not tagger: tagger = Tagger() tagger.open(TOKENIZATION_MODEL_PATH) self.tagger = tagger
class CRFSTagger: def __init__(self, cfg=None, mp=None, fnx=None, win_fnx=None, cols=None, verbose=False): """Creates an instance of CRFSTagger :param cfg: configuration :type cfg: ConfigParser.ConfigParser :param mp: model path :type mp: str """ # configuration self.cfg = None # feature template self.ft_tmpl = None # list of resources used by features, e.g. word clusters, embeddings self.resources = None # data self.train_data = None self.test_data = None # instance of pycrfsuite.Tagger self.tagger = None self.verbose = verbose # attempt to import cannonical replacements try: import canonical self.canonical = canonical.REPLACEMENTS except ImportError: self.canonical = None self.fnx = fnx self.win_fnx = win_fnx self.ft_tmpl_cols = cols # load data and resources if configuration is provided if cfg: self.cfg = cfg expandpaths(self.cfg) # loading resources (clusters, embeddings, etc.) self._load_resources() # loading data self._load_data() # load model elif mp: m = pickle.load(open(mp, 'r')) self.cfg = m.cfg self.cfg.set('tagger', 'model', mp) self.resources = m.resources self.fnx = [self._load_function(n, f) for n, f in m.fnx.items()] if m.fnx else None self.win_fnx = [self._load_function(n, f) for n, f in m.win_fnx] if m.win_fnx else None self.ft_tmpl_cols = m.cols else: raise RuntimeError( 'Configuration initialisation failed. Please, provide either ' 'a configuration or a model.') # parsing feature template self.ft_tmpl = FeatureTemplate(fnx=self.fnx, win_fnx=self.win_fnx, cols=self.ft_tmpl_cols) self.ft_tmpl.parse_ftvec_templ(self.cfg_tag.get('ftvec'), self.resources) @property def cfg_tag(self): """Configuration parameters of this tagger. Returns a section from a ConfigParser object. :return: tagger configuration :rtype: dict """ return dict(self.cfg.items('tagger')) @property def cfg_crf(self): """Configuration parameters for CRFSuite. These are passed to the tagger instance when training is done. Note, these are not necessarily the same as the ones in self.tagger.params. :return: CRFSuite configuration :rtype: dict """ return dict(self.cfg.items('crfsuite')) @property def cfg_res(self): """Resources configuration. Essentially a list of name and file path pairs. :return: list of resources :rtype: dict """ return dict(self.cfg.items('resources')) ############################################################################ ### A group of properties mapped to configuration values of the tagger. ### ############################################################################ ############################################################################ @property def ts(self): tss = {'\\t': '\t', '\\s': ' '} return tss.get(self.cfg_tag['tab_sep'], self.cfg_tag['tab_sep']) @property def cols(self): return self.cfg_tag['cols'] @property def form_col(self): return self.cfg_tag.get('form_col', 'form') @property def lbl_col(self): return self.cfg_tag['label_col'] @property def ilbl_col(self): return self.cfg_tag.get('guess_label_col', 'guesstag') @property def model_path(self): return self.cfg_tag['model'] @property def eval_func(self): return getattr(eval, '%s' % self.cfg_tag['eval_func']) @property def info(self): return self.tagger.info if self.tagger else None ############################################################################ ############################################################################ def _load_resources(self): """Loads resources listed in the `resources` section of the configuration. Resources are generally needed for feature generation. However, note that for a resource to be loaded a `reader` method is needed. For example, to load a clusters resource `cls`, there needs to be a method called `read_cls` in `readers.py` that takes a file path parameter and returns a resource data structure. """ self.resources = {} for n, p in self.cfg_res.items(): self.resources[n] = getattr(readers, 'read_%s' % n)(p) def _load_data(self): """Loads training and testing data if provided in the initial configuration. """ if 'train' in self.cfg_tag and self.cfg_tag['train']: self.train_data = parse_tsv(self.cfg_tag['train'], cols=self.cols, ts=self.ts) if 'test' in self.cfg_tag and self.cfg_tag['test']: self.test_data = parse_tsv(fp=self.cfg_tag['test'], cols=self.cols, ts=self.ts) def _load_function(self, name, code_string): code = marshal.loads(code_string) return types.FunctionType(code, globals(), name) def _extract_features(self, doc, form_col='form'): """A generator methof that extracts features from the data using a feature set template. Yields the feature vector of each sequence in the data. :param doc: data :type doc: np.recarray """ d = copy.deepcopy(doc) # replace tokens with canonical forms if self.canonical: for t in d: for r in self.canonical.keys(): if re.match(r, t['form']): t['form'] = self.canonical[r] # number of features nft = len(self.ft_tmpl.vec) # record count rc = len(d) # recarray data types (60 >= char string, [30 >= char string] * nft) dt = 'a60,{}'.format(','.join('a30' for _ in range(nft))) # constructing empty recarray fts = np.zeros(rc, dtype=dt) # sequence start and end indices s, e = 0, 0 sc = 0 # extracting features sequences by sequence while 0 <= s < len(d): # index of the end of a sequence is recorded at the beginning e = d[s]['eos'] # slicing a sequence seq = d[s:e] ft_seq = np.zeros(len(seq), dtype=dt) # extracting the features for i in range(len(seq)): ft_seq[i] = tuple( self.ft_tmpl.make_fts(seq, i, form_col=form_col)) # moving the start index s = e sc += 1 # yielding a feature sequence yield ft_seq def train(self, data=None, form_col=None, lbl_col=None, ilbl_col=None, data_cols=None, data_sep=None, dump=True): """Trains a model based on provided data and features. The default behaviour is to load training parameters from the global configuration, unless they are passed to this method. IMPORTANT: there are two ways to pass data directly through the `data` parameter: -- np.recarray `data` needs to be a recarray with column names that match what the feature extractor expects. -- csv str `data` needs to contain a TSV/CSV formatted string. Column names and separator should be provided in the `data_cols` and `data_sep` parameters. They should still match what is expected by the feature extractor. The observation, label, and inference column names can be set through the global configuration using the following parameter names: `form_col`, `label_col`, `guess_label_col`. The default observation column name is `fc`, and the inference column name is `guesstag`. All three names can be passed to this method to override global configuration. Any other column names need to match their respective feature extractor functions, e.g. part-of-speech tags need to be placed in `postag` column. See `ftex.FeatureTemplate` for others. RECOMMENDED: use `utils.parse_tsv` to parse input data to avoid column configuration errors. NOTE: Due to the way `pycrfsuite` works, the crfsuite model needs to be dumped on the hard drive, however, the CRFSuiteTagger model does not NEED to be dumped. That process is controlled through the `dump` parameter. :param data: training data :type data: np.recarray or str :param form_col: fc column name :type form_col: str :param lbl_col: label column name :type lbl_col: str :param ilbl_col: inference label column name :type ilbl_col: str :param data_cols: list of columns in the data :type data_cols: str :param data_sep: data tab separator :type data_sep: str :param dump: dumps the model at specified location if True :type dump: bool """ # overriding parameters fc = form_col if form_col else self.form_col c = data_cols if data_cols else self.cols sep = data_sep if data_sep else self.ts lc = lbl_col if lbl_col else self.lbl_col ilc = ilbl_col if ilbl_col else self.ilbl_col if type(data) in [np.core.records.recarray, np.ndarray]: d = data elif type(data) == str: d = parse_tsv(s=data, cols=c, ts=sep, inference_col=ilc) elif data is None: d = self.train_data else: raise ValueError('Invalid input type.') # extract features X = self._extract_features(d, fc) # extract labels y = gsequences(d, [lc]) trainer = Trainer(verbose=self.verbose) # setting CRFSuite parameters trainer.set_params(self.cfg_crf) for x_seq, y_seq in zip(X, y): trainer.append(x_seq, [l[0] for l in y_seq]) crfs_mp = '%s.crfs' % self.model_path try: makedirs(dirname(crfs_mp)) except OSError: pass trainer.train(crfs_mp) self.tagger = Tagger() self.tagger.open(crfs_mp) # dumps the model if dump: self.dump_model(self.model_path) pickle.dump(self.cfg, open('%s.cfg.pcl' % self.model_path, 'w')) def tag(self, data, form_col=None, ilbl_col=None, tagger=None, cols=None, ts=None): """Tags TSV/CSV or np.recarray data using the loaded CRFSuite model. See documentation for `train` for more details on requirements for the data passed to this method. :param data: data :type data: str or recarray :param form_col: form column name :type form_col: str :param ilbl_col: inference label column name :type ilbl_col: str :param tagger: CRFS tagger :type tagger: Tagger :param cols: TSV column names :type cols: str or list of str :param ts: tab separator for TSV :type ts: str :return: tagged data :rtype: recarray """ fc = form_col if form_col else self.form_col c = cols if cols else self.cols sep = ts if ts else self.ts ilc = ilbl_col if ilbl_col else self.ilbl_col if type(data) in [np.core.records.recarray, np.ndarray]: d = data elif type(data) == str: d = parse_tsv(s=data, cols=c, ts=sep) else: raise ValueError('Invalid input type.') tgr = tagger if tgr is None and self.tagger: tgr = self.tagger elif tgr is None: tgr = Tagger() tgr.open('%s.crfs' % self.model_path) # extracting features X = self._extract_features(d, form_col=fc) # tagging sentences idx = 0 for fts in X: for l in tgr.tag(fts): d[idx][ilc] = l idx += 1 return d def test(self, data=None, form_col=None, ilbl_col=None, tagger=None, cols=None, ts=None, eval_func=None): """Tags TSV/CSV or np.recarray data using the loaded CRFSuite model and evaluates the results. See documentation for `train` for more details on requirements for the data passed to this method. :param data: data :type data: str or recarray :param form_col: form column name :type form_col: str :param ilbl_col: inference label column name :type ilbl_col: str :param tagger: CRFS tagger :type tagger: Tagger :param cols: TSV column names :type cols: str or list of str :param ts: tab separator for TSV :type ts: str :param eval_func: evaluation function name [pos, conll, bio] :type eval_func: str :return: results and tagged data pair :rtype: AccuracyResults, np.recarray """ # use provided data or testing data from config file d = self.test_data if data is None else data # setting inference label column name ilc = self.ilbl_col if ilbl_col is None else ilbl_col # tagging d = self.tag(d, form_col=form_col, ilbl_col=ilbl_col, tagger=tagger, cols=cols, ts=ts) # evaluating f = eval_func if eval_func else self.eval_func r = f(d, label_col=self.lbl_col, inference_col=self.ilbl_col) # returnning AccuracyResults and np.recarray tagged data return r, d def dump_model(self, fp): """Dumps the CRFSuiteTagger model in provided file path `fp`. The dumping consists of two files: <fp> and <fp>.crfs. The first contains the configuration and all feature extraction resources needed by a CRFSuiteTagger object to replicate this one. The second one is the pycrfsuite model that needsto be dumped separately as it is always read from the file system. :param fp: model file path :type fp: str """ md = Model() md.cfg = clean_cfg(self.cfg) md.resources = self.resources md.fnx = {f.__name__: marshal.dumps(f.func_code) for f in self.fnx} if self.fnx else None md.win_fnx = { f.__name__: marshal.dumps(f.func_code) for f in self.win_fnx } if self.win_fnx else None md.cols = self.ft_tmpl_cols fpx = expanduser(fp) try: makedirs(dirname(fpx)) except OSError: pass pickle.dump(md, open(fpx, 'w')) if fpx != self.model_path: src = '%s.crfs' % self.model_path trg = '%s.crfs' % fpx try: makedirs(dirname(trg)) except OSError: pass shutil.copy(src, trg)
def test(features): print("Testing..") tagger = Tagger() tagger.open('crf.model') y_pred = [tagger.tag(xseq) for xseq in features] return y_pred