from hist import Histogram import matplotlib.pyplot as plt servers = { "apache": "Apache", "nginx": "nginx", "iis": "IIS", "lighttpd": "lighttpd" } h = Histogram() with open("result.txt", "r") as f: lines = f.read().split("\n") for line in lines: for server in servers: if server in line.lower(): count = line.split(":")[1] count = int(count) for _ in range(count): h.add(servers[server]) h = h.get_dict() print(h) keys = list(h.keys()) values = list(h.values()) X = list(range(len(keys))) plt.bar(X, list(h.values()), align="center")
def has_tld(url, tld): return domain_from_url(url).endswith(tld) def get_html(url): return requests.get(url).text REGISTER = "http://register.start.bg" HEADERS = { "User-Agent": "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/21.0" } visited = set() h = Histogram() links = [link.get("href") for link in BeautifulSoup(get_html(REGISTER)).find_all("a")] for link in links: if link is not None and "link.php" in link: try: target_url = REGISTER + "/" + link r = requests.head(target_url, headers=HEADERS, allow_redirects=True, timeout=10) target_url = domain_from_url(r.url) if target_url not in visited: visited.add(target_url) if has_tld(target_url, ".bg"):
from hist import Histogram import matplotlib.pyplot as plt servers = { "apache": "Apache", "nginx": "nginx", "iis": "IIS", "lighttpd": "lighttpd" } h = Histogram() with open("result.txt", "r") as f: lines = f.read().split("\n") for line in lines: for server in servers: if server in line.lower(): count = line.split(":")[1] count = int(count) for _ in range(count): h.add(servers[server]) h = h.get_dict() print(h) keys = list(h.keys()) values = list(h.values()) X = list(range(len(keys)))
import matplotlib.pyplot as plt from hist import Histogram h = Histogram() most_used_servers = ["Apache", "nginx", "Oracle", "lighttpd", "Microsoft-IIS"] with open("stripped_servers.txt", 'r') as f: data = f.read().split("\n") for serv in data: for server in ["Apache", "nginx", "Oracle", "lighttpd", "Microsoft-IIS"]: if server in serv: h.add(server) keys = list(h.get_dict().keys()) X = list(range(len(keys))) values = list(h.get_dict().values()) plt.bar(X, list(values), width=1) plt.xticks(X, keys) plt.xlabel("Server") plt.ylabel("Count") ax = plt.subplot(111) plt.title("Most used servers for BG sites") plt.savefig("histogram.png")
def has_tld(url, tld): return domain_from_url(url).endswith(tld) def get_html(url): return requests.get(url).text REGISTER = "http://register.start.bg" HEADERS = { "User-Agent": "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/21.0" } visited = set() h = Histogram() links = [ link.get("href") for link in BeautifulSoup(get_html(REGISTER)).find_all("a") ] for link in links: if link is not None and "link.php" in link: try: target_url = REGISTER + "/" + link r = requests.head(target_url, headers=HEADERS, allow_redirects=True, timeout=10) target_url = domain_from_url(r.url)