Пример #1
0
from hist import Histogram
import matplotlib.pyplot as plt

servers = {
    "apache": "Apache",
    "nginx": "nginx",
    "iis": "IIS",
    "lighttpd": "lighttpd"
}
h = Histogram()

with open("result.txt", "r") as f:
    lines = f.read().split("\n")

    for line in lines:
        for server in servers:
            if server in line.lower():
                count = line.split(":")[1]
                count = int(count)

                for _ in range(count):
                    h.add(servers[server])

h = h.get_dict()
print(h)
keys = list(h.keys())
values = list(h.values())

X = list(range(len(keys)))

plt.bar(X, list(h.values()), align="center")
Пример #2
0

def has_tld(url, tld):
    return domain_from_url(url).endswith(tld)


def get_html(url):
    return requests.get(url).text

REGISTER = "http://register.start.bg"
HEADERS = {
    "User-Agent": "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/21.0"
}

visited = set()
h = Histogram()
links = [link.get("href") for link in BeautifulSoup(get_html(REGISTER)).find_all("a")]


for link in links:
    if link is not None and "link.php" in link:
        try:
            target_url = REGISTER + "/" + link
            r = requests.head(target_url, headers=HEADERS, allow_redirects=True, timeout=10)
            
            target_url = domain_from_url(r.url)
            
            if target_url not in visited:
                visited.add(target_url)

                if has_tld(target_url, ".bg"):
Пример #3
0
from hist import Histogram
import matplotlib.pyplot as plt


servers = {
    "apache": "Apache",
    "nginx": "nginx",
    "iis": "IIS",
    "lighttpd": "lighttpd"
}
h = Histogram()

with open("result.txt", "r") as f:
    lines = f.read().split("\n")

    for line in lines:
        for server in servers:
            if server in line.lower():
                count = line.split(":")[1]
                count = int(count)

                for _ in range(count):
                    h.add(servers[server])

h = h.get_dict()
print(h)
keys = list(h.keys())
values = list(h.values())

X = list(range(len(keys))) 
Пример #4
0
import matplotlib.pyplot as plt
from hist import Histogram


h = Histogram()
most_used_servers = ["Apache", "nginx", "Oracle", "lighttpd", "Microsoft-IIS"]
with open("stripped_servers.txt", 'r') as f:
    data = f.read().split("\n")
for serv in data:
    for server in ["Apache", "nginx", "Oracle", "lighttpd", "Microsoft-IIS"]:
        if server in serv:
            h.add(server)

keys = list(h.get_dict().keys())
X = list(range(len(keys)))
values = list(h.get_dict().values())

plt.bar(X, list(values), width=1)
plt.xticks(X, keys)
plt.xlabel("Server")
plt.ylabel("Count")
ax = plt.subplot(111)
plt.title("Most used servers for BG sites")
plt.savefig("histogram.png")
Пример #5
0
def has_tld(url, tld):
    return domain_from_url(url).endswith(tld)


def get_html(url):
    return requests.get(url).text


REGISTER = "http://register.start.bg"
HEADERS = {
    "User-Agent":
    "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/21.0"
}

visited = set()
h = Histogram()
links = [
    link.get("href")
    for link in BeautifulSoup(get_html(REGISTER)).find_all("a")
]

for link in links:
    if link is not None and "link.php" in link:
        try:
            target_url = REGISTER + "/" + link
            r = requests.head(target_url,
                              headers=HEADERS,
                              allow_redirects=True,
                              timeout=10)

            target_url = domain_from_url(r.url)