def testing(args, data, ttns): rasengan.warn('NOTE: this function presupposes that the parameters were ' 'loaded inside the circuit already') test_result = args.validate_predictions_f(data.test_lex, data.idx2label, args, ttns.test_f_classify, data.test_y, data.words_test, fn='/current.test.txt') print('Test F1', test_result['f1']) return test_result['f1']
def transform_train_features_train_labels(train_features, train_labels): f_list = [] l_list = [] for f, l in zip(train_features, train_labels): for idx in l.nonzero()[0]: f_list.append(f) l_list.append(idx) rasengan.warn("We are breaking") break f_list = np.array(f_list) l_list = np.array(l_list) return (f_list, l_list)
def testing(args, data, ttns): rasengan.warn('NOTE: this function presupposes that the parameters were ' 'loaded inside the circuit already') test_result = args.validate_predictions_f( data.test_lex, data.idx2label, args, ttns.test_f_classify, data.test_y, data.words_test, fn='/current.test.txt') print('Test F1', test_result['f1']) return test_result['f1']
def make_conjunctive_feat(feat, feat_name): l = [] n = [] base_feat = feat.shape[1] assert base_feat == len(feat_name) if base_feat > 200: rasengan.warn('Making Conjunctive feature with %d base features' % base_feat) for i in range(base_feat): for j in range(i + 1, base_feat): # Maximummeans OR l.append(feat[:, i].maximum(feat[:, j])) # Minimum means AND # l.append(feat[:, i].minimum(feat[:, j])) n.append(feat_name[i] + feat_name[j]) return (scipy.sparse.hstack([feat] + l), feat_name + n)
def main(): import transducer_score args = transducer_score.args set_dropout_to_zero(args) data = transducer_score.data #--------------------------# # Compile disparate models # #--------------------------# models = [] for pkl_fn, changes in pkl_to_combine: args_clone = rasengan.Namespace(**args) #--------------------# # Update args_clone. # #--------------------# rasengan.warn('NOTE: Seting pretrained_param_pklfile') args_clone.pretrained_param_pklfile = pkl_fn for (k,v) in changes.items(): setattr(args_clone, k, v) print 'Setting args_clone.%s=%s'%(k,str(v)) #---------------------# # Compile args_clone. # #---------------------# ttns_i = rasengan.Namespace('ttns').update_and_append_prefix( compile_args(args_clone), 'test_') load_params_from_pklfile_to_stack_config( pkl_fn, ttns_i.test_stack_config) models.append(ttns_i) #----------------------------# # Aggregate disparate model. # #----------------------------# ttns = Aggregator(models, data) #-----------------------------------------------# # Test performance of Aggregated decision rule. # #-----------------------------------------------# with rasengan.debug_support(): stats_valid = args.validate_predictions_f( data.valid_lex, data.idx2label, args, ttns.test_f_classify, data.valid_y, data.words_valid, fn='/combined.valid.txt') print 'stats_valid', stats_valid
def evaluate_impl(url_mention, TM, E, cat_folds, cat2url, performance_aggregator, DF, cat_idx, cat, folds): print >> sys.stderr, 'progress = %.2f\r' % (float(cat_idx) / len(cat_folds)), for (train_idx, test_idx) in folds: for train_set_size in [1]: # 0.5 S = get(cat2url[cat], train_idx[:int(len(train_idx) * train_set_size)]) EmS = minus(E, S) Q = get(cat2url[cat], test_idx) EmSQ = minus(EmS, Q) # --------------------- # # Extract Textual Clues # # --------------------- # clue_obj = TextualClueObject(S, url_mention, TM) # ------------------------------------ # # Hypothesize Recommendation Criterion # # ------------------------------------ # rec_obj = NBRecommender(clue_obj, args.ngram_occurrence) # ------------------------------- # # Update Recommendation Criterion # # ------------------------------- # updated_rec_obj = FunctionWordRemover(rec_obj, df_obj=DF, df_lim=args.df_lim) updated_rec_obj.report() # ------------------- # # Apply The Criterion # # ------------------- # scores = {} rasengan.warn('Restricted Entities to 1000') for e_idx, e in enumerate(EmSQ[:1000] + Q): try: scores[e] = updated_rec_obj(url_mention[e], ename=e) except KeyError as e: print >> sys.stderr, e continue # ------------------- # # Measure Performance # # ------------------- # performance_aggregator(cat, scores, len(S), Q)
def train_transducer_lbfgs(train_lex, train_y, args, ttns, training_stats, batch_size=None): ''' This function completes a training epoch by doing one run of LBFGS. `ts` abbreviates `train_stack` in entire function Params ------ train_lex : A list of input_strings (the strings are represented as np arrays) train_y : A list of output strings batch_size : UNUSED : (default None) ''' assert args.clipping_value < 0 assert args.projection_threshold < 0 ts_param_name = [ str(e) for e in ttns.train_stack_config.updatable_parameters() ] print('The following params will be trained by lbfgs', ts_param_name) ts_param_shape_list = [ ttns.train_stack_config[name].get_value().shape for name in ts_param_name ] ts_param_shape_map = dict(zip(ts_param_name, ts_param_shape_list)) total_param = sum( numpy.prod(shape) for shape in ts_param_shape_map.values()) def set_entries_in_ttns(param_vec): ''' Set entries in ttns.train_stack_config with corresponding values in param_vec. ''' param_vec = param_vec.astype('float32') offset = 0 for name in ts_param_name: shape = ts_param_shape_map[name] numel = numpy.prod(shape) ttns.train_stack_config[name].set_value( param_vec[offset:offset + numel].reshape(shape)) offset += numel pass return def vectorize(param_list, dtype='float32'): param_vec = numpy.zeros((total_param, ), dtype=dtype) offset = 0 for idx, param in enumerate(param_list): shape = param.shape assert shape == ts_param_shape_list[idx] numel = numpy.prod(shape) param_vec[offset:offset + numel] = param.reshape( (numel, )).astype(dtype) offset += numel pass return param_vec def get_entries_in_ttns(): ''' Set entries in ttns.train_stack_config with corresponding values in param_vec. ''' return vectorize([ ttns.train_stack_config[name].get_value() for name in ts_param_name ]) def loss_over_corpus(param_vec): ''' Compute the loss value over the entire corpus. ''' set_entries_in_ttns(param_vec) corpus_cost = 0 for idx in range(len(train_lex)): input_string = train_lex[idx] output_string = train_y[idx] corpus_cost += ttns.train_f_cost(input_string, output_string) return corpus_cost / len(train_lex) def gradient_over_corpus(param_vec): set_entries_in_ttns(param_vec) corpus_grad = numpy.zeros((total_param, ), dtype='float64') for idx in range(len(train_lex)): input_string = train_lex[idx] output_string = train_y[idx] tmp_grad = ttns.train_f_grad(input_string, output_string) corpus_grad += vectorize(tmp_grad, 'float64') return corpus_grad / len(train_lex) with rasengan.tictoc("Training %d epoch" % training_stats['epoch_id']): init_param = get_entries_in_ttns() rasengan.warn('Skipped FD Check') # print 'Check grad output: Error=', scipy.optimize.check_grad(func=loss_over_corpus, grad=gradient_over_corpus, x0=init_param) opt_param = scipy.optimize.fmin_l_bfgs_b(loss_over_corpus, init_param, fprime=gradient_over_corpus, disp=2, maxiter=1000)[0] set_entries_in_ttns(opt_param) return
def args_creation_part2(args, data): if args.penalty_full_decomp_jason: assert args.use_1bl assert args.bilstm_stagger_schedule == 'external' if (args.partition_dev_into_test and args.partition_dev_into_train): rasengan.warn('NOTE: You are pilfering from dev into both train and test') #------------------------# # Add Topology Arguments # #------------------------# args.in_dim = (data.vocsize + 2) args.wemb1_win_size = args.win args.penalty_vocsize = data.vocsize args.penalty_mid_col = (args.wemb1_win_size - 1)/2 if args.use_0bl: bilstm_stack = [] elif args.use_1bl: bilstm_stack = [ (lstm_seqlabel_circuit.BiLSTM, 'bilstm')] elif args.use_1l: bilstm_stack = [ (lstm_seqlabel_circuit.BiLSTM, 'bilstm'),] elif args.use_6bl: bilstm_stack = [ (lstm_seqlabel_circuit.BiLSTM, 'bilstm'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmV'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmVI')] elif args.use_8bl: bilstm_stack = [ (lstm_seqlabel_circuit.BiLSTM, 'bilstm'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmV'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmVI'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmVII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmVIII')] elif args.use_4l: bilstm_stack = [ (lstm_seqlabel_circuit.BiLSTM, 'bilstm'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV')] else: bilstm_stack = [ (lstm_seqlabel_circuit.BiLSTM, 'bilstm'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV')] args.chips = ( [(lstm_seqlabel_circuit.Embedding, 'wemb1')] + bilstm_stack +[(transducer_circuit.Penalty, 'penalty')]) #---------------------------------------------# # Learning Rates, Optimizers, Epoch, EndPoint # #---------------------------------------------# args.optimizer = lstm_seqlabel_optimizer.sgd args.perform_training = 1 args.perform_testing = (0 or args.partition_dev_into_test) args.lr = 0.4 args.lr_drop = 0.9 args.nepochs = 1000 args.train_f = lstm_seqlabel_training.train_transducer args.validate_predictions_f = ( lstm_seqlabel_validation.validate_predictions_transducer) args.verbose = 2 args.skip_validation = 0 INSERTION_LIMIT = 3 args.endpoint = transducer_wrapper.TransducerWrapper( Transducer(data.vocsize, INSERTION_LIMIT), sampling_decoding=args.sampling_decoding, crunching=args.crunching, ryanout=args.ryanout) args.endpoint.dont_pickle = 1 print args.endpoint #-------------------------------------------------------# # Dropout, Gradient Clipping, L2 Projection for *Wemb1* # #-------------------------------------------------------# args.wemb1_do_dropout = 1 args.wemb1_dropout_retention_freq = .8 args.wemb1_clip_gradient = 1 args.wemb1_l2_project = 1 #---------------------------------------------------------# # Dropout, Gradient Clipping, L2 Projection for *Penalty* # #---------------------------------------------------------# rasengan.warn('We DONT DO DROPOUT ON PENALTY !!') args.penalty_clip_gradient = 1 args.penalty_l2_project = 1 args.penalty_tie_copy_param = 1 args.penalty_vocsize = data.vocsize #-----------------# # LSTM parameters # #-----------------# # Set the forward LSTM of the first LSTM by hand. # Forward LSTM if args.bilstm_externalandcopyatmax: args.bilstm_stagger_schedule = 'external' pass if args.bilstm_runbilstmseparately: args.bilstm_stagger_schedule = 'external' pass args.bilstm_do_backward_pass = not (args.use_1l or args.use_4l) args.bilstm_forward_do_dropout = 1 args.bilstm_forward_dropout_retention_freq = 0.8 args.bilstm_forward_clip_gradient = 1 args.bilstm_forward_l2_project = 1 args.bilstm_forward_add_bias = 1 for prop_src, prop_dest in zip( FORWARD_LSTM_PROPERTIES, BACKWARD_LSTM_PROPERTIES): # Copy Backward LSTM property from the forward part args.copy_invariant_is_prefix('bilstm', prop_src, prop_dest) #------------------------------------------------------------# # Settings for later BiLSTMs : bilstmII, bilstmIII, bilstmIV # # These settings are simply copied over. # # There is no need to remove properties, since properties # # that are not needed would simply not be compiled. # #------------------------------------------------------------# for bilstm_height in range(2, len(bilstm_stack)+1): at_top = (bilstm_height == len(bilstm_stack)) bilstm_height = ARABIC_TO_ROMAN_MAP[bilstm_height] if args.bilstm_externalandcopyatmax or args.bilstm_runbilstmseparately: if at_top: bl_name = ('bilstm%s_forcefully_copy_embedding' '_to_output'%bilstm_height) setattr(args, bl_name, 1) pass if args.bilstm_runbilstmseparately: setattr(args, 'bilstm%s_segregate_bilstm_inputs'%bilstm_height, args.bilstm_runbilstmseparately) setattr(args, 'bilstm%s_stagger_schedule'%bilstm_height, args.bilstm_stagger_schedule) setattr(args, 'bilstm%s_do_backward_pass'%bilstm_height, args.bilstm_do_backward_pass) args = util_add_bilstm_prop(args, 'bilstm%s'%bilstm_height) #----------------------------------------------# # The clipping Value and Projection Threshold. # #----------------------------------------------# args.clipping_value = 10 args.projection_threshold = 7 #------------------------------------------# # Settings for blocking updates to layers. # #------------------------------------------# args.wemb1_block_update = 0 args.bilstm_forward_block_update = 0 args.bilstm_backward_block_update = 0 args.bilstmII_forward_block_update = 0 args.bilstmII_backward_block_update = 0 args.penalty_block_update = 0 #----------------------------# # Learning Rate Controllers. # #----------------------------# args.decay = 0 args.decay_epochs = 0 args.minimum_lr = 1e-5 # The learning rate decay exponent. args.lr_decay_exponent = 0 #-------------------------# # Loading Pretrained PKL. # #-------------------------# rasengan.warn('NOTE: I have set pretrained_param_pklfile to None') args.pretrained_param_pklfile = None return args
def args_creation_part2(args, data): if args.penalty_full_decomp_jason: assert args.use_1bl assert args.bilstm_stagger_schedule == 'external' if (args.partition_dev_into_test and args.partition_dev_into_train): rasengan.warn( 'NOTE: You are pilfering from dev into both train and test') #------------------------# # Add Topology Arguments # #------------------------# args.in_dim = (data.vocsize + 2) args.wemb1_win_size = args.win args.penalty_vocsize = data.vocsize args.penalty_mid_col = (args.wemb1_win_size - 1) / 2 if args.use_0bl: bilstm_stack = [] elif args.use_1bl: bilstm_stack = [(lstm_seqlabel_circuit.BiLSTM, 'bilstm')] elif args.use_1l: bilstm_stack = [ (lstm_seqlabel_circuit.BiLSTM, 'bilstm'), ] elif args.use_6bl: bilstm_stack = [(lstm_seqlabel_circuit.BiLSTM, 'bilstm'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmV'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmVI')] elif args.use_8bl: bilstm_stack = [(lstm_seqlabel_circuit.BiLSTM, 'bilstm'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmV'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmVI'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmVII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmVIII')] elif args.use_4l: bilstm_stack = [(lstm_seqlabel_circuit.BiLSTM, 'bilstm'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV')] else: bilstm_stack = [(lstm_seqlabel_circuit.BiLSTM, 'bilstm'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'), (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV')] args.chips = ([(lstm_seqlabel_circuit.Embedding, 'wemb1')] + bilstm_stack + [(transducer_circuit.Penalty, 'penalty')]) #---------------------------------------------# # Learning Rates, Optimizers, Epoch, EndPoint # #---------------------------------------------# args.optimizer = lstm_seqlabel_optimizer.sgd args.perform_training = 1 args.perform_testing = (0 or args.partition_dev_into_test) args.lr = 0.4 args.lr_drop = 0.9 args.nepochs = 1000 args.train_f = lstm_seqlabel_training.train_transducer args.validate_predictions_f = ( lstm_seqlabel_validation.validate_predictions_transducer) args.verbose = 2 args.skip_validation = 0 INSERTION_LIMIT = 3 args.endpoint = transducer_wrapper.TransducerWrapper( Transducer(data.vocsize, INSERTION_LIMIT), sampling_decoding=args.sampling_decoding, crunching=args.crunching) args.endpoint.dont_pickle = 1 print args.endpoint #-------------------------------------------------------# # Dropout, Gradient Clipping, L2 Projection for *Wemb1* # #-------------------------------------------------------# args.wemb1_do_dropout = 1 args.wemb1_dropout_retention_freq = .8 args.wemb1_clip_gradient = 1 args.wemb1_l2_project = 1 #---------------------------------------------------------# # Dropout, Gradient Clipping, L2 Projection for *Penalty* # #---------------------------------------------------------# rasengan.warn('We DONT DO DROPOUT ON PENALTY !!') args.penalty_clip_gradient = 1 args.penalty_l2_project = 1 args.penalty_tie_copy_param = 1 args.penalty_vocsize = data.vocsize #-----------------# # LSTM parameters # #-----------------# # Set the forward LSTM of the first LSTM by hand. # Forward LSTM if args.bilstm_externalandcopyatmax: args.bilstm_stagger_schedule = 'external' pass if args.bilstm_runbilstmseparately: args.bilstm_stagger_schedule = 'external' pass args.bilstm_do_backward_pass = not (args.use_1l or args.use_4l) args.bilstm_forward_do_dropout = 1 args.bilstm_forward_dropout_retention_freq = 0.8 args.bilstm_forward_clip_gradient = 1 args.bilstm_forward_l2_project = 1 args.bilstm_forward_add_bias = 1 for prop_src, prop_dest in zip(FORWARD_LSTM_PROPERTIES, BACKWARD_LSTM_PROPERTIES): # Copy Backward LSTM property from the forward part args.copy_invariant_is_prefix('bilstm', prop_src, prop_dest) #------------------------------------------------------------# # Settings for later BiLSTMs : bilstmII, bilstmIII, bilstmIV # # These settings are simply copied over. # # There is no need to remove properties, since properties # # that are not needed would simply not be compiled. # #------------------------------------------------------------# for bilstm_height in range(2, len(bilstm_stack) + 1): at_top = (bilstm_height == len(bilstm_stack)) bilstm_height = ARABIC_TO_ROMAN_MAP[bilstm_height] if args.bilstm_externalandcopyatmax or args.bilstm_runbilstmseparately: if at_top: bl_name = ('bilstm%s_forcefully_copy_embedding' '_to_output' % bilstm_height) setattr(args, bl_name, 1) pass if args.bilstm_runbilstmseparately: setattr(args, 'bilstm%s_segregate_bilstm_inputs' % bilstm_height, args.bilstm_runbilstmseparately) setattr(args, 'bilstm%s_stagger_schedule' % bilstm_height, args.bilstm_stagger_schedule) setattr(args, 'bilstm%s_do_backward_pass' % bilstm_height, args.bilstm_do_backward_pass) args = util_add_bilstm_prop(args, 'bilstm%s' % bilstm_height) #----------------------------------------------# # The clipping Value and Projection Threshold. # #----------------------------------------------# args.clipping_value = 10 args.projection_threshold = 7 #------------------------------------------# # Settings for blocking updates to layers. # #------------------------------------------# args.wemb1_block_update = 0 args.bilstm_forward_block_update = 0 args.bilstm_backward_block_update = 0 args.bilstmII_forward_block_update = 0 args.bilstmII_backward_block_update = 0 args.penalty_block_update = 0 #----------------------------# # Learning Rate Controllers. # #----------------------------# args.decay = 0 args.decay_epochs = 0 args.minimum_lr = 1e-5 # The learning rate decay exponent. args.lr_decay_exponent = 0 #-------------------------# # Loading Pretrained PKL. # #-------------------------# rasengan.warn('NOTE: I have set pretrained_param_pklfile to None') args.pretrained_param_pklfile = None return args
def train_transducer_lbfgs( train_lex, train_y, args, ttns, training_stats, batch_size=None): ''' This function completes a training epoch by doing one run of LBFGS. `ts` abbreviates `train_stack` in entire function Params ------ train_lex : A list of input_strings (the strings are represented as np arrays) train_y : A list of output strings batch_size : UNUSED : (default None) ''' assert args.clipping_value < 0 assert args.projection_threshold < 0 ts_param_name = [ str(e) for e in ttns.train_stack_config.updatable_parameters()] print 'The following params will be trained by lbfgs', ts_param_name ts_param_shape_list = [ttns.train_stack_config[name].get_value().shape for name in ts_param_name] ts_param_shape_map = dict(zip(ts_param_name, ts_param_shape_list)) total_param = sum(numpy.prod(shape) for shape in ts_param_shape_map.values()) def set_entries_in_ttns(param_vec): ''' Set entries in ttns.train_stack_config with corresponding values in param_vec. ''' param_vec = param_vec.astype('float32') offset = 0 for name in ts_param_name: shape = ts_param_shape_map[name] numel = numpy.prod(shape) ttns.train_stack_config[name].set_value( param_vec[offset:offset + numel].reshape(shape)) offset += numel pass return def vectorize(param_list, dtype='float32'): param_vec = numpy.zeros((total_param,), dtype=dtype) offset = 0 for idx, param in enumerate(param_list): shape = param.shape assert shape == ts_param_shape_list[idx] numel = numpy.prod(shape) param_vec[offset:offset + numel] = param.reshape((numel,)).astype(dtype) offset += numel pass return param_vec def get_entries_in_ttns(): ''' Set entries in ttns.train_stack_config with corresponding values in param_vec. ''' return vectorize( [ttns.train_stack_config[name].get_value() for name in ts_param_name]) def loss_over_corpus(param_vec): ''' Compute the loss value over the entire corpus. ''' set_entries_in_ttns(param_vec) corpus_cost = 0 for idx in range(len(train_lex)): input_string = train_lex[idx] output_string = train_y[idx] corpus_cost += ttns.train_f_cost(input_string, output_string) return corpus_cost / len(train_lex) def gradient_over_corpus(param_vec): set_entries_in_ttns(param_vec) corpus_grad = numpy.zeros((total_param,), dtype='float64') for idx in range(len(train_lex)): input_string = train_lex[idx] output_string = train_y[idx] tmp_grad = ttns.train_f_grad(input_string, output_string) corpus_grad += vectorize(tmp_grad, 'float64') return corpus_grad / len(train_lex) with rasengan.tictoc("Training %d epoch"%training_stats['epoch_id']): init_param = get_entries_in_ttns() rasengan.warn('Skipped FD Check') # print 'Check grad output: Error=', scipy.optimize.check_grad(func=loss_over_corpus, grad=gradient_over_corpus, x0=init_param) opt_param = scipy.optimize.fmin_l_bfgs_b( loss_over_corpus, init_param, fprime=gradient_over_corpus, disp=2, maxiter=1000)[0] set_entries_in_ttns(opt_param) return
| Last-Updated: Sun May 1 09:07:58 2016 (-0400) | By: Pushpendre Rastogi | Update #: 64 ''' import util_lstm_seqlabel import numpy import time import rasengan import lstm_seqlabel_load_save_model import functools import codecs try: from dependency_parser.RectangleDependencyParser import DependencyParser dp_viterbi_parse = DependencyParser().viterbi_parse except: rasengan.warn( 'You dont have the depenedency parser. Dont worry if you just want the transducer') def get_conlleval_for_task(args): if args.task == 'slu': from data.atis import conlleval elif args.task == 'chunking': from data.conll2003_ner import conlleval elif args.task == 'ner': from data.conll2003_ner import conlleval elif args.task == 'postag': from data.conll_postag import eval_pwa as conlleval else: raise NotImplementedError return conlleval
def config_overide(msg, args): assert ' ' not in msg args.folder = args.folder + '_' + msg rasengan.warn('NOTE: I set args.folder to ' + args.folder) yield pass