Python make_feature_vector示例，gtnlplib.clf_base.make_feature_vector Python示例

示例#1

0

显示文件

文件： perceptron.py 项目： neilbarooah/Text-Classification

def perceptron_update(x,y,weights,labels):
    '''
    compute the perceptron update for a single instance

    :param x: instance, a counter of base features and weights
    :param y: label, a string
    :param weights: a weight vector, represented as a dict
    :param labels: set of possible labels
    :returns: updates to weights, which should be added to weights
    :rtype: defaultdict

    '''
    updated_weights = defaultdict(float)
    y_pred, _ = predict(x, weights, labels)
    fxy = make_feature_vector(x, y)
    fxy_pred = make_feature_vector(x, y_pred)

    wrong_predictions = set(fxy.keys()).symmetric_difference(set(fxy_pred.keys()))
    for prediction in wrong_predictions:
        if prediction in fxy:
            updated_weights[prediction] = fxy.get(prediction)
        else:
            updated_weights[prediction] = - fxy_pred.get(prediction)

    return updated_weights

示例#2

0

显示文件

def perceptron_update(x, y, weights, labels):
    """
    compute the perceptron update for a single instance

    :param x: instance, a counter of base features
    :param y: label, strings
    :param weights: a weight vector, represented as a dict
    :param labels: set of possible labels
    :returns: updates to weights, which should be added to weights
    :rtype: defaultdict

    """

    # update = f(x, y) - f(x, y_real)
    y_predicted, y_score = predict(x, weights, labels)

    update = defaultdict(float)

    f_predicted = make_feature_vector(x, y_predicted)
    f_real = make_feature_vector(x, y)

    features = set(f_predicted.keys())
    features = features.union(f_real.keys())

    for features in list(features):
        value = f_real[features] - f_predicted[features]
        if value != 0:
            update[features] = value

    return update

示例#3

0

显示文件

文件： perceptron.py 项目： shubhampachori12110095/NLP

def perceptron_update(x, y, weights, labels):
    """compute the perceptron update for a single instance

    :param x: instance, a counter of base features and weights
    :param y: label, a string
    :param weights: a weight vector, represented as a dict
    :param labels: set of possible labels
    :returns: updates to weights, which should be added to weights
    :rtype: defaultdict

    """
    y_hat, scores = predict(x, weights, labels)
    f_x_y = make_feature_vector(x, y)
    f_x_y_hat = make_feature_vector(x, y_hat)

    update = defaultdict(float)

    diffKeys = set(f_x_y.keys()) - set(f_x_y_hat.keys())

    for key in diffKeys:
        update[key] = f_x_y.get(key)

    diffKeys = set(f_x_y_hat.keys()) - set(f_x_y.keys())

    for key in diffKeys:
        update[key] = 0.0 - f_x_y_hat.get(key)

    return update

示例#4

0

显示文件

文件： perceptron.py 项目： raviteja5/Deep-Learning-and-NLP

def perceptron_update(x, y, weights, labels):
    """compute the perceptron update for a single instance
    """
    update = defaultdict(float, {})
    predictedLabel, scores = predict(x, weights, labels)
    if predictedLabel != y:
        update.update(make_feature_vector(x, y))
        temp = make_feature_vector(x, predictedLabel)
        for x, y in temp.iteritems():
            temp[x] = -y
        update.update(temp)
    return update

示例#5

0

显示文件

文件： test_classifier.py 项目： cedebrun/gt-nlp-class

def test_d2_1_featvec():
    label = '1980s'
    fv = clf_base.make_feature_vector({'test':1,'case':2},label)
    eq_(len(fv),3)
    eq_(fv[(label,'test')],1)
    eq_(fv[(label,'case')],2)
    eq_(fv[(label,constants.OFFSET)],1)

示例#6

0

显示文件

文件： test_classifier.py 项目： zyc130130/gt-nlp-class

def test_d2_1_featvec():
    label = '1980s'
    fv = clf_base.make_feature_vector({'test': 1, 'case': 2}, label)
    eq_(len(fv), 3)
    eq_(fv[(label, 'test')], 1)
    eq_(fv[(label, 'case')], 2)
    eq_(fv[(label, constants.OFFSET)], 1)

示例#7

0

显示文件

def test_clf_base_d2_1():
    # public
    label = 'iama'
    fv = clf_base.make_feature_vector({'test':1,'case':2},label)
    eq_(len(fv),3)
    eq_(fv[(label,'test')],1)
    eq_(fv[(label,'case')],2)
    eq_(fv[(label,constants.OFFSET)],1)

示例#8

0

显示文件

文件： test_pset1_classifier.py 项目： cedebrun/gt-nlp-class

def test_clf_base_d2_1():
    # public
    label = 'iama'
    fv = clf_base.make_feature_vector({'test':1,'case':2},label)
    eq_(len(fv),3)
    eq_(fv[(label,'test')],1)
    eq_(fv[(label,'case')],2)
    eq_(fv[(label,constants.OFFSET)],1)

示例#9

0

显示文件

文件： perceptron.py 项目： arbylee1/gt-nlp-class

def perceptron_update(x,y,weights,labels):
    """compute the perceptron update for a single instance

    :param x: instance, a counter of base features and weights
    :param y: label, a string
    :param weights: a weight vector, represented as a dict
    :param labels: set of possible labels
    :returns: updates to weights, which should be added to weights
    :rtype: defaultdict

    """
    updates = defaultdict(float)
    prediction = predict(x, weights, labels)
    if prediction[0] != y:
        feature_vector = make_feature_vector(x, y)
        y_hat_feature_vector = make_feature_vector(x, prediction[0])
        for feature in feature_vector:
            updates[feature] = feature_vector[feature]
        for feature in y_hat_feature_vector:
            updates[feature] = -y_hat_feature_vector[feature]
    return updates

示例#10

0

显示文件

def perceptron_update(x, y, weights, labels):
    """compute the perceptron update for a single instance

    :param x: instance, a counter of base features and weights
    :param y: label, a string 
    :param weights: a weight vector, represented as a dict
    :param labels: set of possible labels 
    :returns: updates to weights, which should be added to weights
    :rtype: defaultdict

    """
    y_max = predict(x, weights, labels)
    y_max_label = y_max[0]
    fv = make_feature_vector(x, y)
    fv_new = make_feature_vector(x, y_max_label)
    new_thetha = defaultdict(float)
    if (y_max_label != y):
        for f in fv:
            #new_thetha[f] = weights[f] +fv[f];
            new_thetha[f] += fv[f]
        for f in fv_new:
            #new_thetha[f] = weights[f] - fv_new[f];
            new_thetha[f] -= fv_new[f]
    return new_thetha

示例#11

0

显示文件

文件： logreg.py 项目： shubhampachori12110095/NLP

def estimate_logreg(x,
                    y,
                    N_its,
                    learning_rate=1e-4,
                    regularizer=1e-2,
                    lazy_reg=True):
    """estimate a logistic regression classifier

    :param x: training instances
    :param y: training labels
    :param N_its: number of training iterations
    :param learning_rate: how far to move on the gradient for each instance
    :param regularizer: how much L2 regularization to apply at each update
    :param lazy_reg: whether to do lazy regularization or not
    :returns: dict of feature weights, list of feature weights at each training epoch
    :rtype: dist, list

    """
    weights = defaultdict(float)
    weight_hist = []  #keep a history of the weights after each iteration
    all_labels = set(y)

    # this block is for lazy regularization
    ratereg = learning_rate * regularizer

    def regularize(base_feats):
        for base_feat in base_feats:
            for label in all_labels:
                #print "regularizing",(label,base_feat),t,last_update[base_feat],(1. - ratereg) ** (t-last_update[base_feat])
                weights[(label,
                         base_feat)] *= (1. -
                                         ratereg)**(t - last_update[base_feat])
            last_update[base_feat] = t

    t = 0
    last_update = defaultdict(int)

    eeta = learning_rate

    for it in xrange(N_its):

        for i, (x_i, y_i) in enumerate(zip(x, y)):  #keep
            t += 1

            # regularization
            if lazy_reg:  # lazy regularization is essential for speed
                regularize(x_i)  # only regularize features in this instance
            if not lazy_reg:  # for testing/explanatory purposes only
                for feat, weight in weights.iteritems():
                    if feat[1] is not OFFSET:  # usually don't regularize offset
                        weights[feat] -= ratereg * weight

            p_y = compute_py(x_i, weights, all_labels)  #hint

            term2 = make_feature_vector(x_i, y_i)

            for key in term2.keys():
                weights[key] = weights[key] + (term2[key] * eeta)

            for label in all_labels:
                temp = make_feature_vector(x_i, label)
                for key in temp.keys():
                    weights[key] = weights[key] - (temp[key] * eeta *
                                                   p_y[label])

        print it,
        weight_hist.append(weights.copy())

    # if lazy, let regularizer catch up
    if lazy_reg:
        # iterate over base features
        regularize(
            list(set([f[1] for f in weights.keys() if f[1] is not OFFSET])))

    return weights, weight_hist