def urlcheck_fork(url): """Fork a thread to find matches for the url.""" try: urlobj = MatchPage.objects.get(url=url,url_hash=url_hash(url)) except MatchPage.DoesNotExist: thread = UrlCheckThread() thread.url = url thread.start()
def urlcheck_real(url): """Compute matches for a URL and store them in the database.""" from urlcheck.api import get_dispute_context urlobj,created = MatchPage.objects.get_or_create(url=url, defaults={'url_hash':url_hash(url),'loading':True}) # disputes = s.get_raw_disputes(url) print "get_raw_disputes:",url disputes = [d for d in basematcher.get_raw_disputes(url) if f.is_good(d)] disputes = [d for d in disputes if not t.is_bad(to_unicode(d[1]))] for dispute in remove_duplicates(disputes): sourcecontext = get_dispute_context(dispute[1]) svmitem = {'claimtext':dispute[1], 'matchurl':url,'srcurl':sourcecontext['url'], 'srccontext':sourcecontext['text'], 'matchcontext':"".join(dispute[2])} print "about to compute score with libsvm" svmlock.acquire() try: score = features.classify_item(svmitem,model,range,mapping) except: print "exception in classifier" svmlock.release() print "got score" disputeobj = SimpleMatch(page=urlobj, claimtext=to_unicode(dispute[1]), score = score, matchcontext=to_unicode("".join(dispute[2]))) try: disputeobj.save() except: print "exception saving dispute" urlobj.loading = False urlobj.save() return urlobj.simplematch_set.all()
def urlcheck_fork(url): """Fork a thread to find matches for the url.""" try: urlobj = MatchPage.objects.get(url=url, url_hash=url_hash(url)) except MatchPage.DoesNotExist: thread = UrlCheckThread() thread.url = url thread.start()
def urlcheck_get(url, count=0): """Get matches for a URL, either from the database, or by computing now.""" try: urlobj = MatchPage.objects.get(url=url, url_hash=url_hash(url)) if urlobj.loading and count < 200: sleep(0.25) return urlcheck_get(url, count + 1) else: return urlobj.simplematch_set.all() except MatchPage.DoesNotExist: return urlcheck_real(url)
def urlcheck_get(url,count=0): """Get matches for a URL, either from the database, or by computing now.""" try: urlobj = MatchPage.objects.get(url=url,url_hash=url_hash(url)) if urlobj.loading and count < 200: sleep(0.25) return urlcheck_get(url,count+1) else: return urlobj.simplematch_set.all() except MatchPage.DoesNotExist: return urlcheck_real(url)
def urlcheck_real(url): """Compute matches for a URL and store them in the database.""" from urlcheck.api import get_dispute_context urlobj, created = MatchPage.objects.get_or_create(url=url, defaults={ 'url_hash': url_hash(url), 'loading': True }) # disputes = s.get_raw_disputes(url) print "get_raw_disputes:", url disputes = [d for d in basematcher.get_raw_disputes(url) if f.is_good(d)] disputes = [d for d in disputes if not t.is_bad(to_unicode(d[1]))] for dispute in remove_duplicates(disputes): sourcecontext = get_dispute_context(dispute[1]) svmitem = { 'claimtext': dispute[1], 'matchurl': url, 'srcurl': sourcecontext['url'], 'srccontext': sourcecontext['text'], 'matchcontext': "".join(dispute[2]) } print "about to compute score with libsvm" svmlock.acquire() try: score = features.classify_item(svmitem, model, range, mapping) except: print "exception in classifier" svmlock.release() print "got score" disputeobj = SimpleMatch(page=urlobj, claimtext=to_unicode(dispute[1]), score=score, matchcontext=to_unicode("".join(dispute[2]))) try: disputeobj.save() except: print "exception saving dispute" urlobj.loading = False urlobj.save() return urlobj.simplematch_set.all()