def scrape_report(url): html = get_html(URL.format(strip_url(url))) soup = BeautifulSoup(html, "lxml") stat_overview = soup.select_one(".stat_half_right .stat_overview").text age = stat_overview.split("is")[1].split(".")[0].strip() seo_info = soup.select( ".stat_details .row-fluid.marginBottom_10 .span9 table") seo_info = "{} <br> {}".format(str(seo_info[2]), str(seo_info[3])) data = {"age": age, "seo_info": seo_info} return data # print(scrape_report("http://siitgo.com"))
def scrape_report(url): print() html = get_html(URL.format(strip_url(url))) soup = BeautifulSoup(html, "lxml") similar_website = soup.select_one("#audience_overlap_table") similar_website = str(similar_website).replace("/siteinfo/", "/lookup?site=").replace("&nbsp", "") print(similar_website) data = { "similar_websites":similar_website } return data # print(scrape_report("http://siitgo.com"))
def lookup(request, url): context = {} recent_site = recent_analysed() context["recent_site"] = recent_site website = Website.objects.filter(url=strip_url(url)) if not website.exists(): data = scrape_site_report(url) context["data"] = data create_website(data) else: data = website.first() context["data"] = data return render(request, "lookup.html", context)
def scrape_report(url): html = get_html(URL.format(strip_url(url))) soup = BeautifulSoup(html, "lxml") wrapper = soup.select_one("#content > div > div.wrapper") title = wrapper.select_one(".left h1#page-title") if title.text == "An Error Occurred": return {"success": False, "code": 404, "site": url} first_paragraph = wrapper.select_one(".left p") dib = first_paragraph.select("b") # print(first_paragraph, title) data = { "website": dib[0].text, "url": dib[0].text, "worth": dib[1].text, "daily_unique_users": extract_number(dib[2].text), "daily_page_views": extract_number(dib[3].text), "daily_revenue": dib[4].text, "alexa_rank": dib[5].text, "success": True, "code": 200, "name": get_name(url) } return data
def scrape_report(url=None): # html = open_html() if url: html = get_html(URL.format(strip_url(url))) soup = BeautifulSoup(html, "lxml") basic = soup.select_one("#basic table") website = soup.select_one("#website .dl-horizontal") dns = soup.select_one("#dns table") ip_info = soup.select_one("#geo .dl-horizontal") whois = soup.select_one("#whois pre") data = { "website_info": str(website), "basic_info": str(basic), "dns_info": str(dns), "ip_info": str(ip_info), "whois_info": str(whois) } return data # print(scrape_report("facebook.com"))
def loookup(request): return redirect("/lookup/{}".format(strip_url(request.GET.get("site"))))