示例#1
0
 def get_user_agents(self):
     page_soup = souper.get_soup(
         'http://www.useragentstring.com/pages/useragentstring.php?typ=Browser'
     )
     result = []
     for ua in page_soup.findAll('li'):
         result.append(ua.text)
     return result
def get_fighter_url_exts(index_url):
    soup = get_soup(index_url)

    table_rows = soup.tbody.findAll('tr')

    fighter_url_exts = []
    for tr in table_rows:
        fighter_url_exts.append(tr.a['href'])
    return fighter_url_exts
def get_fighter_matches_tr(url):
    try:
        soup = get_soup(url)
    except:
        return []

    if (soup.tbody != None):
        return soup.tbody.findAll('tr')
    else:
        return []
def get_fighter_info(url):
    try:
        soup = get_soup(url)
    except Exception:
        return None

    name = ''

    try:
        name = soup.h1.string.encode('utf-8')
    except AttributeError:
        print 'AttributeError'

    return name
示例#5
0
 def get_proxy_dict(self):
     page_soup = souper.get_soup('https://free-proxy-list.net/')
     table = page_soup.findAll('table', {'id': 'proxylisttable'})
     proxy_dict = {}
     for row in table:
         row = row.tbody.findAll('tr')
         for columns in row:
             ip_port = columns.findAll('td')
             ip_port_list = []
             for i in ip_port[:2]:
                 ip_port_list.append(i.text)
             is_https = columns.findAll('td', {'class': 'hx'})
             is_https = is_https[0].text
             if is_https == 'yes':
                 proxy_dict[ip_port_list[0] + ':' + ip_port_list[1]] = True
             else:
                 proxy_dict[ip_port_list[0] + ':' + ip_port_list[1]] = False
     return proxy_dict
示例#6
0
def read_url(url):
    print('Reading url:')
    print(url)
    print('-------------------')
    soup = souper.get_soup(url)
    beds, baths = get_beds_bathrooms(soup)
    rents = get_rents(soup)

    # array of [bed, bath, rent]
    arr = []
    for indx, val in enumerate(beds):
        entry = [beds[indx], baths[indx], rents[indx]]
        if entry not in arr:
            if 'pp' in str(entry[2]):
                entry[2] = float(entry[2].replace('pp', '')) * entry[0]
            arr.append(entry)
    print('Beds\tBaths\tRent')
    for i in arr:
        print('%s\t%s\t%s' %(i[0], i[1], i[2]))
    return arr