def data_for_dispute(dispute,url): sourcecontext = get_dispute_context(dispute.claimtext) #svmitem = {'claimtext':dispute.claimtext, #'matchurl':url,'srcurl':sourcecontext['url'], #'srccontext':sourcecontext['text'], #'matchcontext':dispute.matchcontext} #score = f.classify_item(svmitem,model,range,mapping) - sourcecontext['badvotes']/(1+sourcecontext['goodvotes']) score = dispute.score - sourcecontext['badvotes']/(1+sourcecontext['goodvotes']) if dispute.vote == "good": score = 1; return { 'badvotes':sourcecontext['badvotes'], 'goodvotes':sourcecontext['goodvotes'], 'claimtext':dispute.claimtext, 'matchcontext':dispute.matchcontext, 'id':dispute.id, 'score':score, #'bad':t.simple_trim(sourcecontext['text']) != dispute.claimtext or t.is_bad(dispute.claimtext), 'bad':t.is_bad(dispute.claimtext), 'vote':dispute.vote, 'pageurl':url, 'sourceurl':sourcecontext['url'], 'sourcedomain':get_domain(sourcecontext['url']), 'sourcecontext':sourcecontext['text'].replace(dispute.claimtext,"<b>"+dispute.claimtext+"</b>"), 'sourceprefix':sourcecontext['prefix'], 'displaycontext':make_bold_text(dispute.claimtext,dispute.matchcontext)}
def urlcheck_real(url): """Compute matches for a URL and store them in the database.""" from urlcheck.api import get_dispute_context urlobj,created = MatchPage.objects.get_or_create(url=url, defaults={'url_hash':url_hash(url),'loading':True}) # disputes = s.get_raw_disputes(url) print "get_raw_disputes:",url disputes = [d for d in basematcher.get_raw_disputes(url) if f.is_good(d)] disputes = [d for d in disputes if not t.is_bad(to_unicode(d[1]))] for dispute in remove_duplicates(disputes): sourcecontext = get_dispute_context(dispute[1]) svmitem = {'claimtext':dispute[1], 'matchurl':url,'srcurl':sourcecontext['url'], 'srccontext':sourcecontext['text'], 'matchcontext':"".join(dispute[2])} print "about to compute score with libsvm" svmlock.acquire() try: score = features.classify_item(svmitem,model,range,mapping) except: print "exception in classifier" svmlock.release() print "got score" disputeobj = SimpleMatch(page=urlobj, claimtext=to_unicode(dispute[1]), score = score, matchcontext=to_unicode("".join(dispute[2]))) try: disputeobj.save() except: print "exception saving dispute" urlobj.loading = False urlobj.save() return urlobj.simplematch_set.all()
def urlcheck_real(url): """Compute matches for a URL and store them in the database.""" from urlcheck.api import get_dispute_context urlobj, created = MatchPage.objects.get_or_create(url=url, defaults={ 'url_hash': url_hash(url), 'loading': True }) # disputes = s.get_raw_disputes(url) print "get_raw_disputes:", url disputes = [d for d in basematcher.get_raw_disputes(url) if f.is_good(d)] disputes = [d for d in disputes if not t.is_bad(to_unicode(d[1]))] for dispute in remove_duplicates(disputes): sourcecontext = get_dispute_context(dispute[1]) svmitem = { 'claimtext': dispute[1], 'matchurl': url, 'srcurl': sourcecontext['url'], 'srccontext': sourcecontext['text'], 'matchcontext': "".join(dispute[2]) } print "about to compute score with libsvm" svmlock.acquire() try: score = features.classify_item(svmitem, model, range, mapping) except: print "exception in classifier" svmlock.release() print "got score" disputeobj = SimpleMatch(page=urlobj, claimtext=to_unicode(dispute[1]), score=score, matchcontext=to_unicode("".join(dispute[2]))) try: disputeobj.save() except: print "exception saving dispute" urlobj.loading = False urlobj.save() return urlobj.simplematch_set.all()