def compare_revisions(self, old_id, new_id): if old_id is None or new_id is None: return '' from htmldiff import render_html_diff old_rev = Version.objects.get(id=old_id).content_html new_rev = Version.objects.get(id=new_id).content_html return render_html_diff(old_rev, new_rev)
def distance(): data = request.args.get('data', '') title1 = request.args.get('title1', '') title2 = request.args.get('title2', '') data = eval(data) print data res = {'title1': title1, 'title2': title2} nums = data['ids'].values() for num in nums: res[u'原差距'] = mongo_spider.sim.distance(data['num'], num) title_num1 = Simhash(title1).value - sys.maxint title_num2 = Simhash(title2).value - sys.maxint res[u'标题差距'] = mongo_spider.sim.distance(title_num1, title_num2) article = article_mongo.find_one({'_id': data['_id']}, { 'content': 1, 'title': 1 }) content_num1 = Simhash(article['content']).value - sys.maxint text_num = Simhash(article['title'] + default + html2text(article['content'])).value - sys.maxint res['content1'] = article['content'] cursors = article_mongo.find({"_id": { "$in": data['ids'].keys() }}, { 'content': 1, 'title': 1 }) for cursor in cursors: content_num2 = Simhash(cursor['content']).value - sys.maxint text_num2 = Simhash(cursor['title'] + default + html2text(cursor['content'])).value - sys.maxint res[u'正文差距'] = mongo_spider.sim.distance(content_num1, content_num2) res[u'新差距'] = mongo_spider.sim.distance(text_num2, text_num) res['content2'] = cursor['content'] d = render_html_diff(res['content1'], res['content2']) # result = list(d.compare(res['content1'], res['content2'])) # res['res'] = ''.join(result.spilt('+')) # from pprint import pprint as _pprint # _pprint(result) res['d'] = d return render_template('test/detail.html', data=res)
def _generate_diff_html(self): if os.path.isfile(self.html_path): return tmpl_path = os.path.join(os.path.dirname(__file__), "diff.html") logging.debug("creating html diff: %s", self.html_path) diff = htmldiff.render_html_diff(self.old.html, self.new.html) if '<ins>' not in diff and '<del>' not in diff: return False tmpl = jinja2.Template(codecs.open(tmpl_path, "r", "utf8").read()) html = tmpl.render(title=self.new.title, url=self.old.entry.url, old_url=self.old.archive_url, old_time=self.old.created, new_url=self.new.archive_url, new_time=self.new.created, diff=diff) codecs.open(self.html_path, "w", 'utf8').write(html) return True
def generate_diff_html(self, path): if os.path.isfile(path): logging.error("Diff file already exists: %s", path) return None tmpl_path = os.path.join(os.path.dirname(__file__), "diff_template.html") if not os.path.isfile(tmpl_path): logging.error("Failed to find diff template: %s", tmpl_path) return None logging.debug("creating html diff: %s", path) diff = htmldiff.render_html_diff(self.old.html, self.new.html) if not self.validate_diff(diff): return None tmpl = jinja2.Template(codecs.open(tmpl_path, "r", "utf8").read()) html = tmpl.render(title=self.new.title, url=self.old.entry.url, old_time=self.old.created, new_time=self.new.created, diff=diff) return html
def view_revisions(request, cat, slug): news = get_object_or_404(News, slug=slug) revisions = Version.objects.select_related().filter(news=news) \ .order_by('-created_at') diff = '' nb_revisions = revisions.count() old_id = request.GET.get('old', None) new_id = request.GET.get('new', None) if old_id is not None and new_id is not None: from htmldiff import render_html_diff old_rev = Version.objects.get(id=old_id).content_html new_rev = Version.objects.get(id=new_id).content_html diff = render_html_diff(old_rev, new_rev) return render(request, 'view_post_revisions.html', { 'post': news, 'post_revisions': revisions, 'nb_revisions': nb_revisions, 'diff': diff })
def distance(): data = request.args.get('data', '') title1 = request.args.get('title1', '') title2 = request.args.get('title2', '') data = eval(data) print data res = {'title1':title1, 'title2':title2} nums = data['ids'].values() for num in nums: res[u'原差距'] = mongo_spider.sim.distance(data['num'], num) title_num1 = Simhash(title1).value - sys.maxint title_num2 = Simhash(title2).value - sys.maxint res[u'标题差距'] = mongo_spider.sim.distance(title_num1, title_num2) article = article_mongo.find_one({'_id':data['_id']}, {'content':1, 'title':1}) content_num1 = Simhash(article['content']).value - sys.maxint text_num = Simhash(article['title'] + default + html2text(article['content']) ).value - sys.maxint res['content1'] = article['content'] cursors = article_mongo.find({"_id":{"$in" : data['ids'].keys()}}, {'content':1, 'title':1}) for cursor in cursors: content_num2 = Simhash(cursor['content']).value - sys.maxint text_num2 = Simhash(cursor['title'] + default + html2text(cursor['content'])).value - sys.maxint res[u'正文差距'] = mongo_spider.sim.distance(content_num1, content_num2) res[u'新差距'] = mongo_spider.sim.distance(text_num2, text_num ) res['content2'] = cursor['content'] d = render_html_diff(res['content1'], res['content2']) # result = list(d.compare(res['content1'], res['content2'])) # res['res'] = ''.join(result.spilt('+')) # from pprint import pprint as _pprint # _pprint(result) res['d'] = d return render_template('test/detail.html', data=res)