Exemplos de getHTML em Python, exemplos de html.getHTML em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: crawler.py Projeto: banana42/sqlivulscan.

def crawl(url):
    """crawl the links of the same given domain"""
    global links

    links = []

    try:
        result, URL = html.getHTML(url, lastURL=True)
    except:
        return None

    if result:
        # get only domain name
        domain = 'http://' + '/'.join(URL.split('/')[2:-1]) + '/' if len(
            URL.split('/')) >= 4 else URL.rstrip('/') + '/'

        for link in re.findall('<a href="(.*?)"', result):
            # www.example.com/index.(php|aspx|jsp)?query=1
            if re.search('(.*?)(.php\?|.asp\?|.apsx\?|.jsp\?)(.*?)=(.*?)',
                         link):
                if parameterControl(link) == True:
                    if link.startswith(
                        ("http", "www")) or domain in urlparse(link).path:
                        links.append(link)
                    else:
                        links.append(domain +
                                     link if link.startswith("/") else domain +
                                     link)

    return links

Exemplo n.º 2

0

Exibir arquivo

Arquivo: serverinfo.py Projeto: banana42/sqlivulscan.

def __getServerInfo(url):
    """get server name and version of given domain"""

    url = urlparse(url).netloc if urlparse(url).netloc != '' else urlparse(url).path.split("/")[0]

    info = []  # to store server info
    url = "https://aruljohn.com/webserver/" + url

    try:
        result = html.getHTML(url)
    except KeyboardInterrupt:
        raise KeyboardInterrupt

    try:
        soup = bs4.BeautifulSoup(result, "lxml")
    except:
        return ['', '']

    if soup.findAll('p', {"class" : "err"}):
        return ['', '']

    for row in soup.findAll('tr'):
        if row.findAll('td', {"class": "title"}):
            info.append(row.findAll('td')[1].text.rstrip('\r'))

    return info

Exemplo n.º 3

0

Exibir arquivo

Arquivo: scraper.py Projeto: nmwalsh/HLTV-Scraper

def getPlayers(playerID):
    html = getHTML("https://www.hltv.org/player/%s/a" % (playerID))
    if html is None:
        print("Failed for %s" % (playerID))
        return []
    # Find the type of event (online, LAN, etc)
    playerName = re.findall('Complete statistics for.*</a>', html)
    if len(playerName) < 1:
        return []
    playerCountry = re.findall('class=\"flag\" title=\".*\"> ', html)
    if len(playerCountry) < 1:
        return []

    # print teamName
    if len(playerName) > 0:
        playerName[0] = (playerName[0].replace("Complete statistics for ", "")).replace("</a>", "")
    else:
        playerName.append(0)

    # print teamCountry
    if len(playerCountry) > 0:
        playerCountry[0] = (playerCountry[0].replace("class=\"flag\" title=\"", "")).replace("\"> ", "")
    else:
        playerCountry.append(0)

    # Make an array for pool.map to process
    array = []
    array.append(playerName[0])
    array.append(playerCountry[0])
    array.append(playerID)

    return array

Exemplo n.º 4

0

Exibir arquivo

def scan(url):
    """check SQL injection vulnerability"""

    domain = url.split("?")[0]  # domain with path without queries
    queries = urlparse(url).query.split("&")

    # no queries in url
    if not any(queries):
        return False

    for query in range(len(queries)):
        queries_temp = queries[:]  # copy queries for temp
        queries_temp[query] = queries_temp[query] + "'"
        website = domain + "?"

        for each in queries_temp:
            if each != queries_temp[-1]:
                website += each + "&"
            else:
                website += each

        result = html.getHTML(website)
        if result and sqlerrors.check(result):
            return True

    return False

Exemplo n.º 5

0

Exibir arquivo

Arquivo: scraper.py Projeto: nmwalsh/HLTV-Scraper

def getTeams(teamID):
    html = getHTML("https://www.hltv.org/team/%s/a" % (teamID))
    if html is None:
        print("Failed for %s" % (teamID))
        return []
    # Find the type of event (online, LAN, etc)
    teamName = re.findall('<div><span class=\"subjectname\">.*</span><br><i', html)
    if len(teamName) < 1:
        return []
    teamCountry = re.findall('fa fa-map-marker\" aria-hidden=\"true\"></i>.*<', html)
    if len(teamCountry) < 1:
        teamCountry = re.findall('fa fa-map-marker\" aria-hidden=\"true\"></i>.*</div>', html)
    if len(teamCountry) < 1:
        return []

    # print teamName
    if len(teamName) > 0:
        teamName[0] = (teamName[0].replace("<div><span class=\"subjectname\">", "")).replace("</span><br><i", "")
    else:
        teamName.append(0)

    # print teamCountry
    if len(teamCountry) > 0:
        teamCountry[0] = (teamCountry[0].replace("fa fa-map-marker\" aria-hidden=\"true\"></i> ", "")).split("<", 1)[0]
    else:
        teamCountry.append(0)

    # Make an array for pool.map to process
    array = []
    array.append(teamName[0])
    array.append(teamCountry[0])
    array.append(teamID)

    return array

Exemplo n.º 6

0

Exibir arquivo

Arquivo: scraper.py Projeto: nmwalsh/HLTV-Scraper

def getMatchLineups(matchID):
    # Set some vars for later
    html = getHTML("https://www.hltv.org/matches/%s" % (matchID))
    if html is None:
        print("Failed for %s" % (matchID))
        return []
    playerIDs = re.findall('<a href=\"/player/.*/', html)

    # Give up if no team names found
    if len(playerIDs) < 1:
        print("%s failed, no players detected" % (matchID))
        return []
    for i in range(0, len(playerIDs)):
        playerIDs[i] = (playerIDs[i].split("/"))[2].split("/")[0]
    # print(playerIDs)c
    # print(playerIDs[0:5] + playerIDs[10:15])

    # Make an array for pool.map to process
    if len(playerIDs) > 15:
        players = []
        players.append(playerIDs[0])
        players.append(playerIDs[1])
        players.append(playerIDs[2])
        players.append(playerIDs[3])
        players.append(playerIDs[4])
        players.append(playerIDs[5])
        players.append(playerIDs[6])
        players.append(playerIDs[7])
        players.append(playerIDs[8])
        players.append(playerIDs[9])
        players.append(matchID)
        return players
    else:
        print("HLTV altered lineup layout for %s" % (matchID))
        return []

Exemplo n.º 7

0

Exibir arquivo

Arquivo: getMatchIDs.py Projeto: nmwalsh/HLTV-Scraper

def findMatchIDsAtURL(url):
    # Get the HTML using getHTML()
    html = getHTML(url)

    # Create an array of all of the Match URLs on the page
    matchIDs = re.findall('"(.*?000"><a href="/matches/.*?)"', html)

    # Loop through the messy array and removes the pesky parts
    for i in range(0, len(matchIDs)):
        matchIDs[i] = matchIDs[i].split('/', 2)[-1]
    return matchIDs

Exemplo n.º 8

0

Exibir arquivo

Arquivo: helper.py Projeto: nmwalsh/HLTV-Scraper

def getNewIterableItems(page, startID):
    # Iterate through unique IDs until we get the last one, then return them to a list
    print("Checking for new %ss. This may take awhile." % (page))
    check = True
    array = []
    while check:
        startID += 1
        html = getHTML("https://www.hltv.org/%s/%s/a" % (page, startID))
        if html is None:
            check = False
        else:
            sys.stdout.write('\r' + "New %s found: %s" % (page, startID))
            sys.stdout.flush()
            array.append(startID)

    print("\nFound %s new %ss." % (len(array), page))
    return array

Exemplo n.º 9

0

Exibir arquivo

def __checkSQLi(url):
    """check SQL injection vulnerability"""

    io.stdout("scanning {}".format(url), end="")

    domain = url.split("?")[0]  # domain with path without queries
    queries = urlparse(url).query.split("&")

    # no queries in url
    if not any(queries):
        return False

    website = domain + "?" + ("&".join([param + "'" for param in queries]))
    result = html.getHTML(website)
    if result and sqlerrors.check(result):
        io.showsign(" vulnerable")
        return True

    print ""  # move cursor to new line
    return False

Exemplo n.º 10

0

Exibir arquivo

Arquivo: scraper.py Projeto: nmwalsh/HLTV-Scraper

def getMatchEvents(matchID):
    html = getHTML("https://www.hltv.org/matches/%s" % (matchID))
    if html is None:
        print("Failed for %s" % (matchID))
        return []
    # Find the type of event (online, LAN, etc)
    eventName = re.findall('\"/events/.*/', html)
    if len(eventName) < 1:
        print("Failed %s" % (matchID))
        return []

    # print eventType
    if len(eventName) > 1:
        eventName[0] = (eventName[0].replace("\"/events/", "")).split("/", 1)[0]
    else:
        eventName.append(0)

    # Make an array for pool.map to process
    array = []
    array.append(matchID)
    array.append(eventName[0])
    return array

Exemplo n.º 11

0

Exibir arquivo

Arquivo: scraper.py Projeto: nmwalsh/HLTV-Scraper

def getEventNames(eventID):
    html = getHTML("https://www.hltv.org/results?offset=0&event=%s" % (eventID))
    if html is None:
        print("Failed for %s" % (eventID))
        return []
    # Find the type of event (online, LAN, etc)
    eventType = re.findall(' <div class=\".*text-ellipsis\">', html)
    if len(eventType) < 1:
        return []
    eventNames = re.findall('text-ellipsis\">.*<', html)
    eventEndDate = re.findall('class="standard-headline">.*<', html)

    # print eventType
    if len(eventType) > 0:
        eventType[0] = (eventType[0].replace(" <div class=\"", "")).replace(" text-ellipsis\">", "")
    else:
        eventType.append(0)

    # print eventNames
    if len(eventNames) > 0:
        eventNames[0] = (eventNames[0].replace("text-ellipsis\">", "")).replace("<", "")
    else:
        eventNames.append(0)

    # print eventEndDate
    if len(eventEndDate) > 0:
        eventEndDate[0] = (eventEndDate[0].replace("class=\"standard-headline\">", "")).replace("<", "")
    else:
        eventEndDate.append(0)
    # Make an array for pool.map to process
    result = []
    result.append(eventType[0])
    result.append(eventNames[0])
    result.append(eventEndDate[0])
    result.append(eventID)
    return result

Exemplo n.º 12

0

Exibir arquivo

Arquivo: scanner.py Projeto: moxyas/sqliv2

    try:
        while True:
	@@ -40,13 +40,13 @@ def callback(result, url=url):
        pool.join()

    for url, result in results.items():
        if result[0] == True:
            vulnerables.append((url, result[1]))

    return vulnerables


def __sqli(url):
    """check SQL injection vulnerability"""

    io.stdout("scanning {}".format(url), end="")
	@@ -59,10 +59,12 @@ def __checkSQLi(url):
        return False

    website = domain + "?" + ("&".join([param + "'" for param in queries]))
    source = html.getHTML(website)
    if source:
        vulnerable, db = sqlerrors.check(source)
        if vulnerable and db != None:
            io.showsign(" vulnerable")
            return True, db

    print ""  # move cursor to new line
    return False, None

Exemplo n.º 13

0

Exibir arquivo

Arquivo: scraper.py Projeto: nmwalsh/HLTV-Scraper

def getPlayerStats(matchID):
    html = getHTML("https://www.hltv.org/matches/%s" % (matchID))
    if html is None:
        print("Failed for %s" % (matchID))
        return []

    # Get maps
    maps = re.findall('<div class=\"stats-content\" id=\".*-content\">', html)
    if len(maps) > 0:
        for i in range(0, len(maps)):
            maps[i] = (maps[i].replace("<div class=\"stats-content\" id=\"", "")).replace("-content\">", "").translate({ord(k): None for k in digits})
        maps.remove(maps[0])
    else:
        print("No player stats for %s" % (matchID))
        return []

    # Get Player IDs
    players = re.findall('href=\"/player/.*/', html)
    if len(players) > 0:
        for i in range(0, len(players)):
            players[i] = (players[i].replace("href=\"/player/", "")).replace("/", "")
    else:
        print("No player IDs for %s" % (matchID))
        return []

    # Find player KDs
    kd = re.findall('<td class=\"kd text-center\">.*</td>', html)
    kills = []
    deaths = []
    if len(kd) > 0:
        for i in range(0, len(kd)):
            kd[i] = (kd[i].replace("<td class=\"kd text-center\">", "")).replace("</td>", "")
            # Clean up the hyphenated numbers
            kills.append(kd[i][0:kd[i].find('-')])
            deaths.append(kd[i][kd[i].find('-')+1:len(kd[i])])
    else:
        print("No player K/D for %s" % (matchID))
        return []
    # Remove unnecessary instances of D
    deaths[:] = [x for x in deaths if x != 'D']
    # Remove unnecessary instances of K
    kills[:] = [x for x in kills if x != 'K']

    # Find player ADR
    adr = re.findall('<td class=\"adr text-center \">.*</td>', html)
    if len(adr) > 0:
        for i in range(0, len(adr)):
            adr[i] = (adr[i].replace("<td class=\"adr text-center \">", "")).replace("</td>", "")
    else:
        print("No player ADR for %s" % (matchID))
        adr = [""] * 70

    # Find player KAST%
    kast = re.findall('<td class=\"kast text-center\">.*</td>', html)
    if len(kast) > 0:
        for i in range(0, len(kast)):
            kast[i] = (kast[i].replace("<td class=\"kast text-center\">", "")).replace("%</td>", "")
    else:
        print("No player KAST ratio for %s" % (matchID))
        kast = [""] * 70

    # Find player rating
    rating = re.findall('<td class=\"rating text-center\">.*</td>', html)
    if len(rating) > 0:
        for i in range(0, len(rating)):
            rating[i] = (rating[i].replace("<td class=\"rating text-center\">", "")).replace("</td>", "")
    else:
        print("No player Rating for %s" % (matchID))
        return []

    # Remove unnecessary instances of 'Rating'
    rating[:] = [x for x in rating if x != 'Rating']

    # Handle array building
    masterArray = []
    for i in range(0, len(maps)):
        # Arrays have data for multiple matches, so this offsets us by the amount to get each map separately
        offset = 10 * (i+1)
        for b in range(0, 5):
            playerArray = []
            playerArray.append(maps[i])
            playerArray.append(players[b+offset])
            playerArray.append(kills[b+offset])
            playerArray.append(deaths[b+offset])
            playerArray.append(adr[b+offset])
            playerArray.append(kast[b+offset])
            playerArray.append(rating[b+offset])
            playerArray.append(matchID)
            masterArray.append(playerArray)
        for b in range(5, 10):
            playerArray = []
            playerArray.append(maps[i])
            playerArray.append(players[b+offset])
            playerArray.append(kills[b+offset])
            playerArray.append(deaths[b+offset])
            playerArray.append(adr[b+offset])
            playerArray.append(kast[b+offset])
            playerArray.append(rating[b+offset])
            playerArray.append(matchID)
            masterArray.append(playerArray)
    return masterArray

Exemplo n.º 14

0

Exibir arquivo

Arquivo: scraper.py Projeto: nmwalsh/HLTV-Scraper

def getMatchInfo(matchID):
    html = getHTML("https://www.hltv.org/matches/%s" % (matchID))
    if html is None:
        print("Failed for %s" % (matchID))
        return []
    # Search variables data-unix="
    date = re.findall('data-unix=\".*\"', html)
    teamIDs = re.findall('src=\"https://static.hltv.org/images/team/logo/.*\" class', html)
    teamNames = re.findall('class=\"logo\" title=\".*\">', html)
    map = re.findall('<div class=\"mapname\">.*</div>', html)
    scores = re.findall('<div class=\"results\"><span class=\".*</span><span>', html)

    # Give up if no team names found
    if len(teamNames) < 1:
        return []

    # Find the match date
    if len(date) > 0:
        date[0] = (date[0].replace("data-unix=\"", "")).replace("\"", "")[:-3]
        date[0] = datetime.utcfromtimestamp(int(date[0])).strftime('%Y-%m-%d')
    else:
        date.append(0)

    # Find the Teams respective IDs
    if len(teamIDs) > 0:
        teamIDs[0] = (teamIDs[0].replace("src=\"https://static.hltv.org/images/team/logo/", "")).replace("\" class", "")
        teamIDs[1] = (teamIDs[1].replace("src=\"https://static.hltv.org/images/team/logo/", "")).replace("\" class", "")
    else:
        teamIDs.append(0)

    # Find the map(s) that the match was played on
    if len(map) == 1:
        map[0] = (map[0].replace("<div class=\"mapname\">", "")).replace("</div>", "")
    elif len(map) > 1:
        for i in range(0, len(map)):
            map[i] = (map[i].replace("<div class=\"mapname\">", "")).replace("</div>", "")
    else:
        map.append(0)

    # Find the team standing and half sides
    sides = []
    if len(scores) == 1:
        if re.findall('\"t\"|\"ct\"', scores[0])[0] == '\"t\"':
            sides.append("T")
            sides.append("CT")
        else:
            sides.append("CT")
            sides.append("T")
    elif len(scores) > 1:
        for i in range(0, len(scores)):
            if re.findall('\"t\"|\"ct\"', scores[i])[0] == "\"t\"":
                sides.append("T")
                sides.append("CT")
            else:
                sides.append("CT")
                sides.append("T")
    else:
        return []

    # Find the scores if there is only one map
    if len(map) == 1:
        scores[0] = re.findall('\d+', scores[0])
    # Find the scores if there are multiple maps
    elif len(map) > 1:
        for i in range(0, len(scores)):
            scores[i] = re.findall('\d+', scores[i])
    else:
        scores.append(0)

    for i in range(0, len(scores)):
        # If there was no overtime, make the OT value 0
        if len(scores[i]) == 6:
            scores[i].append(0)
            scores[i].append(0)
        elif len(scores[i]) > 6:
            # Do nothing, because OT scores are already calculated
            pass
        else:
            print("HLTV altered score layout for %s" % (matchID))
            return []

    # Make an array for pool.map to process
    result = []
    if len(map) > 1:
        for i in range(0, len(scores)):
            # Create a temp array so that each map's stats are each contained in their own array
            tempArray = []
            tempArray.append(date[0])
            tempArray.append(map[i])
            tempArray.append(teamIDs[0])
            tempArray.append(sides[0])
            tempArray.append(scores[i][0])
            tempArray.append(scores[i][2])
            tempArray.append(scores[i][4])
            tempArray.append(scores[i][6])
            tempArray.append(teamIDs[1])
            tempArray.append(sides[1])
            tempArray.append(scores[i][1])
            tempArray.append(scores[i][3])
            tempArray.append(scores[i][5])
            tempArray.append(scores[i][7])
            tempArray.append(matchID)
            result.append(tempArray)
    else:
        result.append(date[0])
        result.append(map[0])
        result.append(teamIDs[0])
        result.append(sides[0])
        result.append(scores[0][0])
        result.append(scores[0][2])
        result.append(scores[0][4])
        result.append(scores[0][6])
        result.append(teamIDs[1])
        result.append(sides[1])
        result.append(scores[0][1])
        result.append(scores[0][3])
        result.append(scores[0][5])
        result.append(scores[0][7])
        result.append(matchID)
    return result