Пример #1
0
def testing(args, data, ttns):
    rasengan.warn('NOTE: this function presupposes that the parameters were '
                  'loaded inside the circuit already')
    test_result = args.validate_predictions_f(data.test_lex,
                                              data.idx2label,
                                              args,
                                              ttns.test_f_classify,
                                              data.test_y,
                                              data.words_test,
                                              fn='/current.test.txt')
    print('Test F1', test_result['f1'])
    return test_result['f1']
Пример #2
0
def transform_train_features_train_labels(train_features, train_labels):
    f_list = []
    l_list = []
    for f, l in zip(train_features, train_labels):
        for idx in l.nonzero()[0]:
            f_list.append(f)
            l_list.append(idx)
            rasengan.warn("We are breaking")
            break
    f_list = np.array(f_list)
    l_list = np.array(l_list)
    return (f_list, l_list)
def testing(args, data, ttns):
    rasengan.warn('NOTE: this function presupposes that the parameters were '
                  'loaded inside the circuit already')
    test_result = args.validate_predictions_f(
        data.test_lex,
        data.idx2label,
        args,
        ttns.test_f_classify,
        data.test_y,
        data.words_test,
        fn='/current.test.txt')
    print('Test F1', test_result['f1'])
    return test_result['f1']
def make_conjunctive_feat(feat, feat_name):
    l = []
    n = []
    base_feat = feat.shape[1]
    assert base_feat == len(feat_name)
    if base_feat > 200:
        rasengan.warn('Making Conjunctive feature with %d base features' %
                      base_feat)
    for i in range(base_feat):
        for j in range(i + 1, base_feat):
            # Maximummeans OR
            l.append(feat[:, i].maximum(feat[:, j]))
            # Minimum means AND
            # l.append(feat[:, i].minimum(feat[:, j]))
            n.append(feat_name[i] + feat_name[j])
    return (scipy.sparse.hstack([feat] + l), feat_name + n)
Пример #5
0
def main():
    import transducer_score
    args = transducer_score.args
    set_dropout_to_zero(args)
    data = transducer_score.data
    #--------------------------#
    # Compile disparate models #
    #--------------------------#
    models = []
    for pkl_fn, changes in pkl_to_combine:
        args_clone = rasengan.Namespace(**args)
        #--------------------#
        # Update args_clone. #
        #--------------------#
        rasengan.warn('NOTE: Seting pretrained_param_pklfile')
        args_clone.pretrained_param_pklfile = pkl_fn
        for (k,v) in changes.items():
            setattr(args_clone, k, v)
            print 'Setting args_clone.%s=%s'%(k,str(v))
        #---------------------#
        # Compile args_clone. #
        #---------------------#
        ttns_i = rasengan.Namespace('ttns').update_and_append_prefix(
            compile_args(args_clone), 'test_')
        load_params_from_pklfile_to_stack_config(
            pkl_fn, ttns_i.test_stack_config)
        models.append(ttns_i)

    #----------------------------#
    # Aggregate disparate model. #
    #----------------------------#
    ttns = Aggregator(models, data)
    #-----------------------------------------------#
    # Test performance of Aggregated decision rule. #
    #-----------------------------------------------#
    with rasengan.debug_support():
        stats_valid = args.validate_predictions_f(
            data.valid_lex,
            data.idx2label,
            args,
            ttns.test_f_classify,
            data.valid_y,
            data.words_valid,
            fn='/combined.valid.txt')
        print 'stats_valid', stats_valid
Пример #6
0
def evaluate_impl(url_mention, TM, E, cat_folds, cat2url,
                  performance_aggregator, DF, cat_idx, cat, folds):
    print >> sys.stderr, 'progress = %.2f\r' % (float(cat_idx) /
                                                len(cat_folds)),
    for (train_idx, test_idx) in folds:
        for train_set_size in [1]:  # 0.5
            S = get(cat2url[cat],
                    train_idx[:int(len(train_idx) * train_set_size)])
            EmS = minus(E, S)
            Q = get(cat2url[cat], test_idx)
            EmSQ = minus(EmS, Q)
            # --------------------- #
            # Extract Textual Clues #
            # --------------------- #
            clue_obj = TextualClueObject(S, url_mention, TM)
            # ------------------------------------ #
            # Hypothesize Recommendation Criterion #
            # ------------------------------------ #
            rec_obj = NBRecommender(clue_obj, args.ngram_occurrence)
            # ------------------------------- #
            # Update Recommendation Criterion #
            # ------------------------------- #
            updated_rec_obj = FunctionWordRemover(rec_obj,
                                                  df_obj=DF,
                                                  df_lim=args.df_lim)
            updated_rec_obj.report()
            # ------------------- #
            # Apply The Criterion #
            # ------------------- #
            scores = {}
            rasengan.warn('Restricted Entities to 1000')
            for e_idx, e in enumerate(EmSQ[:1000] + Q):
                try:
                    scores[e] = updated_rec_obj(url_mention[e], ename=e)
                except KeyError as e:
                    print >> sys.stderr, e
                    continue
            # ------------------- #
            # Measure Performance #
            # ------------------- #
            performance_aggregator(cat, scores, len(S), Q)
Пример #7
0
def train_transducer_lbfgs(train_lex,
                           train_y,
                           args,
                           ttns,
                           training_stats,
                           batch_size=None):
    ''' This function completes a training epoch by doing one run of LBFGS.
    `ts` abbreviates `train_stack` in entire function

    Params
    ------
    train_lex      : A list of input_strings (the strings are represented as np arrays)
    train_y        : A list of output strings
    batch_size     : UNUSED : (default None)
    '''
    assert args.clipping_value < 0
    assert args.projection_threshold < 0

    ts_param_name = [
        str(e) for e in ttns.train_stack_config.updatable_parameters()
    ]
    print('The following params will be trained by lbfgs', ts_param_name)
    ts_param_shape_list = [
        ttns.train_stack_config[name].get_value().shape
        for name in ts_param_name
    ]
    ts_param_shape_map = dict(zip(ts_param_name, ts_param_shape_list))

    total_param = sum(
        numpy.prod(shape) for shape in ts_param_shape_map.values())

    def set_entries_in_ttns(param_vec):
        ''' Set entries in ttns.train_stack_config
        with corresponding values in param_vec.
        '''
        param_vec = param_vec.astype('float32')
        offset = 0
        for name in ts_param_name:
            shape = ts_param_shape_map[name]
            numel = numpy.prod(shape)
            ttns.train_stack_config[name].set_value(
                param_vec[offset:offset + numel].reshape(shape))
            offset += numel
            pass
        return

    def vectorize(param_list, dtype='float32'):
        param_vec = numpy.zeros((total_param, ), dtype=dtype)
        offset = 0
        for idx, param in enumerate(param_list):
            shape = param.shape
            assert shape == ts_param_shape_list[idx]
            numel = numpy.prod(shape)
            param_vec[offset:offset + numel] = param.reshape(
                (numel, )).astype(dtype)
            offset += numel
            pass
        return param_vec

    def get_entries_in_ttns():
        ''' Set entries in ttns.train_stack_config
        with corresponding values in param_vec.
        '''
        return vectorize([
            ttns.train_stack_config[name].get_value() for name in ts_param_name
        ])

    def loss_over_corpus(param_vec):
        ''' Compute the loss value over the entire corpus.
        '''
        set_entries_in_ttns(param_vec)
        corpus_cost = 0
        for idx in range(len(train_lex)):
            input_string = train_lex[idx]

            output_string = train_y[idx]

            corpus_cost += ttns.train_f_cost(input_string, output_string)
        return corpus_cost / len(train_lex)

    def gradient_over_corpus(param_vec):
        set_entries_in_ttns(param_vec)
        corpus_grad = numpy.zeros((total_param, ), dtype='float64')
        for idx in range(len(train_lex)):
            input_string = train_lex[idx]
            output_string = train_y[idx]
            tmp_grad = ttns.train_f_grad(input_string, output_string)
            corpus_grad += vectorize(tmp_grad, 'float64')
        return corpus_grad / len(train_lex)

    with rasengan.tictoc("Training %d epoch" % training_stats['epoch_id']):
        init_param = get_entries_in_ttns()
        rasengan.warn('Skipped FD Check')
        # print 'Check grad output: Error=', scipy.optimize.check_grad(func=loss_over_corpus, grad=gradient_over_corpus, x0=init_param)
        opt_param = scipy.optimize.fmin_l_bfgs_b(loss_over_corpus,
                                                 init_param,
                                                 fprime=gradient_over_corpus,
                                                 disp=2,
                                                 maxiter=1000)[0]
        set_entries_in_ttns(opt_param)
    return
Пример #8
0
def args_creation_part2(args, data):
    if args.penalty_full_decomp_jason:
        assert args.use_1bl
        assert args.bilstm_stagger_schedule == 'external'

    if (args.partition_dev_into_test
        and args.partition_dev_into_train):
        rasengan.warn('NOTE: You are pilfering from dev into both train and test')
    #------------------------#
    # Add Topology Arguments #
    #------------------------#
    args.in_dim = (data.vocsize + 2)
    args.wemb1_win_size = args.win
    args.penalty_vocsize = data.vocsize
    args.penalty_mid_col = (args.wemb1_win_size - 1)/2
    if args.use_0bl:
        bilstm_stack = []
    elif args.use_1bl:
        bilstm_stack = [
            (lstm_seqlabel_circuit.BiLSTM, 'bilstm')]
    elif args.use_1l:
        bilstm_stack = [
            (lstm_seqlabel_circuit.BiLSTM, 'bilstm'),]
    elif args.use_6bl:
        bilstm_stack = [
            (lstm_seqlabel_circuit.BiLSTM, 'bilstm'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmV'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmVI')]
    elif args.use_8bl:
        bilstm_stack = [
            (lstm_seqlabel_circuit.BiLSTM, 'bilstm'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmV'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmVI'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmVII'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmVIII')]
    elif args.use_4l:
        bilstm_stack = [
            (lstm_seqlabel_circuit.BiLSTM, 'bilstm'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV')]
    else:
        bilstm_stack = [
            (lstm_seqlabel_circuit.BiLSTM, 'bilstm'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'),
            (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV')]
    args.chips = (
        [(lstm_seqlabel_circuit.Embedding, 'wemb1')]
        + bilstm_stack
        +[(transducer_circuit.Penalty, 'penalty')])
    #---------------------------------------------#
    # Learning Rates, Optimizers, Epoch, EndPoint #
    #---------------------------------------------#
    args.optimizer = lstm_seqlabel_optimizer.sgd
    args.perform_training = 1
    args.perform_testing = (0 or args.partition_dev_into_test)
    args.lr = 0.4
    args.lr_drop = 0.9
    args.nepochs = 1000
    args.train_f = lstm_seqlabel_training.train_transducer
    args.validate_predictions_f = (
        lstm_seqlabel_validation.validate_predictions_transducer)
    args.verbose = 2
    args.skip_validation = 0
    INSERTION_LIMIT = 3
    args.endpoint = transducer_wrapper.TransducerWrapper(
        Transducer(data.vocsize, INSERTION_LIMIT),
        sampling_decoding=args.sampling_decoding,
        crunching=args.crunching, ryanout=args.ryanout)
    args.endpoint.dont_pickle = 1
    print args.endpoint
    #-------------------------------------------------------#
    # Dropout, Gradient Clipping, L2 Projection for *Wemb1* #
    #-------------------------------------------------------#
    args.wemb1_do_dropout = 1
    args.wemb1_dropout_retention_freq = .8
    args.wemb1_clip_gradient = 1
    args.wemb1_l2_project = 1
    #---------------------------------------------------------#
    # Dropout, Gradient Clipping, L2 Projection for *Penalty* #
    #---------------------------------------------------------#
    rasengan.warn('We DONT DO DROPOUT ON PENALTY !!')
    args.penalty_clip_gradient = 1
    args.penalty_l2_project = 1
    args.penalty_tie_copy_param = 1
    args.penalty_vocsize = data.vocsize
    #-----------------#
    # LSTM parameters #
    #-----------------#
    # Set the forward LSTM of the first LSTM by hand.
    # Forward LSTM
    if args.bilstm_externalandcopyatmax:
        args.bilstm_stagger_schedule = 'external'
        pass

    if args.bilstm_runbilstmseparately:
        args.bilstm_stagger_schedule = 'external'
        pass

    args.bilstm_do_backward_pass = not (args.use_1l or args.use_4l)
    args.bilstm_forward_do_dropout = 1
    args.bilstm_forward_dropout_retention_freq = 0.8
    args.bilstm_forward_clip_gradient = 1
    args.bilstm_forward_l2_project = 1
    args.bilstm_forward_add_bias = 1
    for prop_src, prop_dest in zip(
            FORWARD_LSTM_PROPERTIES, BACKWARD_LSTM_PROPERTIES):
        # Copy Backward LSTM property from the forward part
        args.copy_invariant_is_prefix('bilstm', prop_src, prop_dest)
    #------------------------------------------------------------#
    # Settings for later BiLSTMs : bilstmII, bilstmIII, bilstmIV #
    # These settings are simply copied over.                     #
    # There is no need to remove properties, since properties    #
    # that are not needed would simply not be compiled.          #
    #------------------------------------------------------------#
    for bilstm_height in range(2, len(bilstm_stack)+1):
        at_top = (bilstm_height == len(bilstm_stack))
        bilstm_height = ARABIC_TO_ROMAN_MAP[bilstm_height]
        if args.bilstm_externalandcopyatmax or args.bilstm_runbilstmseparately:
            if at_top:
                bl_name = ('bilstm%s_forcefully_copy_embedding'
                           '_to_output'%bilstm_height)
                setattr(args, bl_name, 1)
            pass
        if args.bilstm_runbilstmseparately:
            setattr(args, 'bilstm%s_segregate_bilstm_inputs'%bilstm_height,
                    args.bilstm_runbilstmseparately)

        setattr(args, 'bilstm%s_stagger_schedule'%bilstm_height,
                args.bilstm_stagger_schedule)
        setattr(args, 'bilstm%s_do_backward_pass'%bilstm_height,
                args.bilstm_do_backward_pass)
        args = util_add_bilstm_prop(args, 'bilstm%s'%bilstm_height)
    #----------------------------------------------#
    # The clipping Value and Projection Threshold. #
    #----------------------------------------------#
    args.clipping_value = 10
    args.projection_threshold = 7
    #------------------------------------------#
    # Settings for blocking updates to layers. #
    #------------------------------------------#
    args.wemb1_block_update = 0
    args.bilstm_forward_block_update = 0
    args.bilstm_backward_block_update = 0
    args.bilstmII_forward_block_update = 0
    args.bilstmII_backward_block_update = 0
    args.penalty_block_update = 0
    #----------------------------#
    # Learning Rate Controllers. #
    #----------------------------#
    args.decay = 0
    args.decay_epochs = 0
    args.minimum_lr = 1e-5
    # The learning rate decay exponent.
    args.lr_decay_exponent = 0
    #-------------------------#
    # Loading Pretrained PKL. #
    #-------------------------#
    rasengan.warn('NOTE: I have set pretrained_param_pklfile to None')
    args.pretrained_param_pklfile = None
    return args
Пример #9
0
def args_creation_part2(args, data):
    if args.penalty_full_decomp_jason:
        assert args.use_1bl
        assert args.bilstm_stagger_schedule == 'external'

    if (args.partition_dev_into_test and args.partition_dev_into_train):
        rasengan.warn(
            'NOTE: You are pilfering from dev into both train and test')
    #------------------------#
    # Add Topology Arguments #
    #------------------------#
    args.in_dim = (data.vocsize + 2)
    args.wemb1_win_size = args.win
    args.penalty_vocsize = data.vocsize
    args.penalty_mid_col = (args.wemb1_win_size - 1) / 2
    if args.use_0bl:
        bilstm_stack = []
    elif args.use_1bl:
        bilstm_stack = [(lstm_seqlabel_circuit.BiLSTM, 'bilstm')]
    elif args.use_1l:
        bilstm_stack = [
            (lstm_seqlabel_circuit.BiLSTM, 'bilstm'),
        ]
    elif args.use_6bl:
        bilstm_stack = [(lstm_seqlabel_circuit.BiLSTM, 'bilstm'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmV'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmVI')]
    elif args.use_8bl:
        bilstm_stack = [(lstm_seqlabel_circuit.BiLSTM, 'bilstm'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmV'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmVI'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmVII'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmVIII')]
    elif args.use_4l:
        bilstm_stack = [(lstm_seqlabel_circuit.BiLSTM, 'bilstm'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV')]
    else:
        bilstm_stack = [(lstm_seqlabel_circuit.BiLSTM, 'bilstm'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmII'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmIII'),
                        (lstm_seqlabel_circuit.BiLSTM, 'bilstmIV')]
    args.chips = ([(lstm_seqlabel_circuit.Embedding, 'wemb1')] + bilstm_stack +
                  [(transducer_circuit.Penalty, 'penalty')])
    #---------------------------------------------#
    # Learning Rates, Optimizers, Epoch, EndPoint #
    #---------------------------------------------#
    args.optimizer = lstm_seqlabel_optimizer.sgd
    args.perform_training = 1
    args.perform_testing = (0 or args.partition_dev_into_test)
    args.lr = 0.4
    args.lr_drop = 0.9
    args.nepochs = 1000
    args.train_f = lstm_seqlabel_training.train_transducer
    args.validate_predictions_f = (
        lstm_seqlabel_validation.validate_predictions_transducer)
    args.verbose = 2
    args.skip_validation = 0
    INSERTION_LIMIT = 3
    args.endpoint = transducer_wrapper.TransducerWrapper(
        Transducer(data.vocsize, INSERTION_LIMIT),
        sampling_decoding=args.sampling_decoding,
        crunching=args.crunching)
    args.endpoint.dont_pickle = 1
    print args.endpoint
    #-------------------------------------------------------#
    # Dropout, Gradient Clipping, L2 Projection for *Wemb1* #
    #-------------------------------------------------------#
    args.wemb1_do_dropout = 1
    args.wemb1_dropout_retention_freq = .8
    args.wemb1_clip_gradient = 1
    args.wemb1_l2_project = 1
    #---------------------------------------------------------#
    # Dropout, Gradient Clipping, L2 Projection for *Penalty* #
    #---------------------------------------------------------#
    rasengan.warn('We DONT DO DROPOUT ON PENALTY !!')
    args.penalty_clip_gradient = 1
    args.penalty_l2_project = 1
    args.penalty_tie_copy_param = 1
    args.penalty_vocsize = data.vocsize
    #-----------------#
    # LSTM parameters #
    #-----------------#
    # Set the forward LSTM of the first LSTM by hand.
    # Forward LSTM
    if args.bilstm_externalandcopyatmax:
        args.bilstm_stagger_schedule = 'external'
        pass

    if args.bilstm_runbilstmseparately:
        args.bilstm_stagger_schedule = 'external'
        pass

    args.bilstm_do_backward_pass = not (args.use_1l or args.use_4l)
    args.bilstm_forward_do_dropout = 1
    args.bilstm_forward_dropout_retention_freq = 0.8
    args.bilstm_forward_clip_gradient = 1
    args.bilstm_forward_l2_project = 1
    args.bilstm_forward_add_bias = 1
    for prop_src, prop_dest in zip(FORWARD_LSTM_PROPERTIES,
                                   BACKWARD_LSTM_PROPERTIES):
        # Copy Backward LSTM property from the forward part
        args.copy_invariant_is_prefix('bilstm', prop_src, prop_dest)
    #------------------------------------------------------------#
    # Settings for later BiLSTMs : bilstmII, bilstmIII, bilstmIV #
    # These settings are simply copied over.                     #
    # There is no need to remove properties, since properties    #
    # that are not needed would simply not be compiled.          #
    #------------------------------------------------------------#
    for bilstm_height in range(2, len(bilstm_stack) + 1):
        at_top = (bilstm_height == len(bilstm_stack))
        bilstm_height = ARABIC_TO_ROMAN_MAP[bilstm_height]
        if args.bilstm_externalandcopyatmax or args.bilstm_runbilstmseparately:
            if at_top:
                bl_name = ('bilstm%s_forcefully_copy_embedding'
                           '_to_output' % bilstm_height)
                setattr(args, bl_name, 1)
            pass
        if args.bilstm_runbilstmseparately:
            setattr(args, 'bilstm%s_segregate_bilstm_inputs' % bilstm_height,
                    args.bilstm_runbilstmseparately)

        setattr(args, 'bilstm%s_stagger_schedule' % bilstm_height,
                args.bilstm_stagger_schedule)
        setattr(args, 'bilstm%s_do_backward_pass' % bilstm_height,
                args.bilstm_do_backward_pass)
        args = util_add_bilstm_prop(args, 'bilstm%s' % bilstm_height)
    #----------------------------------------------#
    # The clipping Value and Projection Threshold. #
    #----------------------------------------------#
    args.clipping_value = 10
    args.projection_threshold = 7
    #------------------------------------------#
    # Settings for blocking updates to layers. #
    #------------------------------------------#
    args.wemb1_block_update = 0
    args.bilstm_forward_block_update = 0
    args.bilstm_backward_block_update = 0
    args.bilstmII_forward_block_update = 0
    args.bilstmII_backward_block_update = 0
    args.penalty_block_update = 0
    #----------------------------#
    # Learning Rate Controllers. #
    #----------------------------#
    args.decay = 0
    args.decay_epochs = 0
    args.minimum_lr = 1e-5
    # The learning rate decay exponent.
    args.lr_decay_exponent = 0
    #-------------------------#
    # Loading Pretrained PKL. #
    #-------------------------#
    rasengan.warn('NOTE: I have set pretrained_param_pklfile to None')
    args.pretrained_param_pklfile = None
    return args
def train_transducer_lbfgs(
        train_lex, train_y, args, ttns, training_stats, batch_size=None):
    ''' This function completes a training epoch by doing one run of LBFGS.
    `ts` abbreviates `train_stack` in entire function

    Params
    ------
    train_lex      : A list of input_strings (the strings are represented as np arrays)
    train_y        : A list of output strings
    batch_size     : UNUSED : (default None)
    '''
    assert args.clipping_value < 0
    assert args.projection_threshold < 0

    ts_param_name = [
        str(e) for e in ttns.train_stack_config.updatable_parameters()]
    print 'The following params will be trained by lbfgs', ts_param_name
    ts_param_shape_list = [ttns.train_stack_config[name].get_value().shape
                           for name in ts_param_name]
    ts_param_shape_map = dict(zip(ts_param_name, ts_param_shape_list))

    total_param = sum(numpy.prod(shape)
                      for shape
                      in ts_param_shape_map.values())

    def set_entries_in_ttns(param_vec):
        ''' Set entries in ttns.train_stack_config
        with corresponding values in param_vec.
        '''
        param_vec = param_vec.astype('float32')
        offset = 0
        for name in ts_param_name:
            shape = ts_param_shape_map[name]
            numel = numpy.prod(shape)
            ttns.train_stack_config[name].set_value(
                param_vec[offset:offset + numel].reshape(shape))
            offset += numel
            pass
        return

    def vectorize(param_list, dtype='float32'):
        param_vec = numpy.zeros((total_param,), dtype=dtype)
        offset = 0
        for idx, param in enumerate(param_list):
            shape = param.shape
            assert shape == ts_param_shape_list[idx]
            numel = numpy.prod(shape)
            param_vec[offset:offset + numel] = param.reshape((numel,)).astype(dtype)
            offset += numel
            pass
        return param_vec

    def get_entries_in_ttns():
        ''' Set entries in ttns.train_stack_config
        with corresponding values in param_vec.
        '''
        return vectorize(
            [ttns.train_stack_config[name].get_value()
             for name
             in ts_param_name])

    def loss_over_corpus(param_vec):
        ''' Compute the loss value over the entire corpus.
        '''
        set_entries_in_ttns(param_vec)
        corpus_cost = 0
        for idx in range(len(train_lex)):
            input_string = train_lex[idx]
            output_string = train_y[idx]
            corpus_cost += ttns.train_f_cost(input_string, output_string)
        return corpus_cost / len(train_lex)

    def gradient_over_corpus(param_vec):
        set_entries_in_ttns(param_vec)
        corpus_grad = numpy.zeros((total_param,), dtype='float64')
        for idx in range(len(train_lex)):
            input_string = train_lex[idx]
            output_string = train_y[idx]
            tmp_grad = ttns.train_f_grad(input_string, output_string)
            corpus_grad += vectorize(tmp_grad, 'float64')
        return corpus_grad / len(train_lex)

    with rasengan.tictoc("Training %d epoch"%training_stats['epoch_id']):
        init_param = get_entries_in_ttns()
        rasengan.warn('Skipped FD Check')
        # print 'Check grad output: Error=', scipy.optimize.check_grad(func=loss_over_corpus, grad=gradient_over_corpus, x0=init_param)
        opt_param = scipy.optimize.fmin_l_bfgs_b(
            loss_over_corpus, init_param,
            fprime=gradient_over_corpus, disp=2, maxiter=1000)[0]
        set_entries_in_ttns(opt_param)
    return
Пример #11
0
| Last-Updated: Sun May  1 09:07:58 2016 (-0400)
|           By: Pushpendre Rastogi
|     Update #: 64
'''
import util_lstm_seqlabel
import numpy
import time
import rasengan
import lstm_seqlabel_load_save_model
import functools
import codecs
try:
    from dependency_parser.RectangleDependencyParser import DependencyParser
    dp_viterbi_parse = DependencyParser().viterbi_parse
except:
    rasengan.warn(
        'You dont have the depenedency parser. Dont worry if you just want the transducer')


def get_conlleval_for_task(args):
    if args.task == 'slu':
        from data.atis import conlleval
    elif args.task == 'chunking':
        from data.conll2003_ner import conlleval
    elif args.task == 'ner':
        from data.conll2003_ner import conlleval
    elif args.task == 'postag':
        from data.conll_postag import eval_pwa as conlleval
    else:
        raise NotImplementedError
    return conlleval
Пример #12
0
def config_overide(msg, args):
    assert ' ' not in msg
    args.folder = args.folder + '_' + msg
    rasengan.warn('NOTE: I set args.folder to ' + args.folder)
    yield
    pass
| Last-Updated: Sun May  1 09:07:58 2016 (-0400)
|           By: Pushpendre Rastogi
|     Update #: 64
'''
import util_lstm_seqlabel
import numpy
import time
import rasengan
import lstm_seqlabel_load_save_model
import functools
import codecs
try:
    from dependency_parser.RectangleDependencyParser import DependencyParser
    dp_viterbi_parse = DependencyParser().viterbi_parse
except:
    rasengan.warn(
        'You dont have the depenedency parser. Dont worry if you just want the transducer')


def get_conlleval_for_task(args):
    if args.task == 'slu':
        from data.atis import conlleval
    elif args.task == 'chunking':
        from data.conll2003_ner import conlleval
    elif args.task == 'ner':
        from data.conll2003_ner import conlleval
    elif args.task == 'postag':
        from data.conll_postag import eval_pwa as conlleval
    else:
        raise NotImplementedError
    return conlleval
def config_overide(msg, args):
    assert ' ' not in msg
    args.folder = args.folder + '_' + msg
    rasengan.warn('NOTE: I set args.folder to ' + args.folder)
    yield
    pass