Пример #1
0
def train(links):
    from math import exp,fabs,log
    fwords=most_frequent_words()
    classifiers=[PredicateClassifier(HasWordsPredicate([w])) for w in fwords]
    #classifiers.extend(PredicateClassifier(HasWordsPredicate(duo)) for duo in most_frequent_duos(fwords))
    titles=[mash_post(l) for l in links]
    evaluations=[1. if l.evaluation else -1. for l in links]
    weights=[1./len(links) for l in links]
    trained=[]
    print "Training on %d features..." % len(classifiers)
    while True:
        print ".",
        min_error=1e6 ; best=None
        for c in classifiers:
            c.train(titles,weights,evaluations)
            error=sum(weights[n]*0.5*fabs(c.predict(t)-evaluations[n]) for n,t in enumerate(titles))
            if error < min_error:
                best=c; min_error=error
        if min_error>=0.5:
            print min_error
            break
        Zt=sum(weights[n]*exp(-best.predict(t)*evaluations[n]) for n,t in enumerate(titles))
        weights=[weights[n]*exp(-best.predict(t)*evaluations[n])/Zt for n,t in enumerate(titles)]
        alphat=0.5*log((1-min_error)/min_error)
        trained.append((best,alphat))
        classifiers.remove(best)
    for c,alpha in trained:
        print c.predicate,c.wordgood,alpha
    import cPickle
    cPickle.dump(trained,open("adaboost.pck","wb"),-1)
Пример #2
0
def predict(link):
    #words=tokenize(link.title)
    words=mash_post(link)
    if sum(alpha * c.predict(words) for c,alpha in trained) >= 0:
        return 1.
    else:
        return -1.