def assertDumpContains(self,response,url,text, count=None, status_code=200, msg_prefix=''): """ Dump the content in assertion error case """ try: self.assertContains(response, text, count, status_code, msg_prefix) except AssertionError,e: filename = url.replace("/","_").replace("&","-").replace("?","_").replace("%",'--').replace("#",'').replace("=",'--') filename +="_test_error.html" repl= "<!-- Backend: %s --!>\n</html>" % (seo_link_settings.BACKEND) dump_to_static_folderfile(filename, response.content.replace("</html>",repl)) raise e
def test_url(self, request, query_set): dummy = DummyClient() test_date = datetime.now() for url in query_set: self._delete_old_testresults(url) sep = "&" if url.test_url.find("?") == -1: sep = "?" url_no_processing = "%s%s%s=True" % (url.test_url, sep, NO_PROCESSING_GET_PARAM) response = dummy.client.get(url_no_processing) title, link_tuples = self._extract_links_title(response.content) response2 = dummy.client.get(url.test_url) title, link_tuples_with_injected = self._extract_links_title(response2.content) injested_links = link_tuples_with_injected - link_tuples for href, anchor in link_tuples: test_result_obj = TestResult.objects.get_or_create(page_url=url, page_title=title.strip(), link_url=href, link_text=anchor.strip(), is_injected=False, created_at=test_date ) for href, anchor in injested_links: test_result_obj = TestResult.objects.get_or_create(page_url=url, page_title=title.strip(), link_url=href, link_text=anchor.strip(), is_injected=True, created_at=test_date ) # test done save it url.tested_at = test_date url.save() if DUMP_TEST_URLS_FAILURES_TO_STATIC and len(link_tuples) == len(link_tuples_with_injected): name1 = "%s_org.html" % (url.id) name2 = "%s_injected.html" % (url.id) dump_to_static_folderfile(name1, response.content) dump_to_static_folderfile(name2, response2.content) msg = "queried %s urls" % (len(query_set)) request.user.message_set.create(message=msg)
def _replace_nodes_get_html(self, soup2, cur_tag, term): self.iteration_count += 1 soup_backup = soup2 if self._is_nodes_parent_tree_valid_for_replacement(soup2, cur_tag, term) : #replace the node with the template #and reload the soup new_snippet = self._get_template_context_for_term(term) new_html = u"" if isinstance(cur_tag, NavigableString): if seo_link_settings.DEBUG: log.debug("term %s replace: %s" % (term.words, cur_tag)) # update the stats replacement_count = self.get_replacement_count_for_term(term) replacement_count += 1 self.update_replacement_count_for_term(term, replacement_count) if cur_tag.parent is not None: """ replace the text with the new nodes structure """ parent_tag = cur_tag.parent parent_tag_contents = parent_tag.contents cur_tag_index = parent_tag_contents.index(cur_tag) new_txt = parent_tag_contents[cur_tag_index].replace(term.words, new_snippet['content']) new_node = BeautifulSoup(smart_unicode_encode(new_txt)) if seo_link_settings.DEBUG: log.debug("parent_tag_content %s" % parent_tag_contents) log.debug("parent_tag_content [%s] : %s" % (cur_tag_index, parent_tag_contents[cur_tag_index])) log.debug("new_txt %s" % (new_txt)) log.debug ("new_node contents %s" % new_node.contents) log.debug ("new_node %s" % new_node) # manual replace do_replace = False if not seo_link_settings.REPLACE_ONLY_ONE_TIME_PER_TERM: do_replace = True elif seo_link_settings.REPLACE_ONLY_ONE_TIME_PER_TERM and replacement_count < 2: do_replace = True if do_replace: if seo_link_settings.DEBUG: log.debug("replacing node with %s" % new_node) soup_backup = soup2 #replace it cur_tag.extract() parent_tag.insert(cur_tag_index, new_node) cur_tag = parent_tag.contents[cur_tag_index] if seo_link_settings.DEBUG: if cur_tag.parent is None: log.debug("current parent is None") else: if seo_link_settings.DEBUG: log.debug("matched tag class %s" % (cur_tag.__class__)) out = None try: out = u"".join(unicode(soup2)) # this is dirty but it is the only way to get the modified html as a new document except UnicodeDecodeError, e: out = u"".join(unicode(soup_backup)) # get the none breaking version log.error(e) if seo_link_settings.DEBUG: log.error("iteration:%s -tag:%s -term:%s "% (self.iteration_count,cur_tag,term.words)) log.error(cur_tag) if seo_link_settings.DUMP_TEST_URLS_FAILURES_TO_STATIC: url = "parse_error_%s" %(term.words) filename = url.replace("/","_").replace("&","-").replace("?","_").replace("%",'--').replace("#",'').replace("=",'--').replace(" ",'-') filename +="_simple.html" dump_to_static_folderfile(filename,unicode(soup2))