Python BeautifulSoup примеры использования

Язык программирования: Python

Пространство имен/Пакет: mechanize._beautifulsoup

Класс/Тип: BeautifulSoup

Примеров на hotexamples.com: 12

Python BeautifulSoup - 12 примеров найдено. Это лучшие примеры Python кода для mechanize._beautifulsoup.BeautifulSoup, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

fetch(9)

done(2)

find(1)

Пример #1

Показать файл

Файл: submit_fuel.py Проект: Vlad-Shcherbina/icfpc2010-tbd

def list_cars():
    br = mechanize.Browser()
    response = br.open("http://icfpcontest.org/icfp10/score/instanceTeamCount")
    assert br.viewing_html()
    body = response.read()
    
    #print body
    #body = open('body').read()
    
    bs = BeautifulSoup(body)
    #print bs.prettify()
    #print bs.nextSibling.prettify()
    ids = []
    for tr in bs.fetch('tr'):
        if len(tr.fetch('td')) < 1:
            continue
        suppliers = str(tr.fetch('td')[1].contents[0])
        
        f = tr.fetch('form')[0]
        id = f.get('action')
        m = re.search(r"\/(\d+)\/", id)
        id = m.group(1)
        
        ids.append((id, suppliers))
    return ids

Пример #2

Показать файл

Файл: claim_sync.py Проект: dckc/hh-office

 def _batches(self, txt):
     doc = HTML(txt)
     doc.done()
     t = doc.html.first('table', {'align': 'Center'})
     return [batch(row('td')[1])
             # skip heading row
             for row in t('tr')[1:]]

Пример #3

Показать файл

Файл: submit_fuel.py Проект: Vlad-Shcherbina/icfpc2010-tbd

def submit_test_car_fuel(cardata, fuel):

    br = mechanize.Browser()
    
    #br.set_response()
    res = br.open("http://nfa.imn.htwk-leipzig.de/icfpcont/")
    #res = br.post("http://nfa.imn.htwk-leipzig.de/icfpcont/")
    
    br.select_form(nr=0)
        
    br["G0"] = cardata
    br["G1"] = fuel

    response = br.submit()
    
    body = response.read()
#    print body
    bs = BeautifulSoup(body)
    
    print bs
    
    result = ''
    for pre in bs.fetch('pre'):
        result += pre.renderContents()
    
    if FAIL_ON_SUBMISSION_ERROR:
        assert result.find('Good!') != -1, result
    
    return result

Пример #4

Показать файл

Файл: fetch_stats.py Проект: Vlad-Shcherbina/icfpc2010-tbd

def fetch_results():
    br = mechanize.Browser()
    response = br.open("http://icfpcontest.org/icfp10/score/teamAll")
    
    body = response.read()
    #body = open('body').read()
    #print body
    bs = BeautifulSoup(body)

    tbody = bs.fetch('table')
    
    if len(tbody) < 1:
        print "Server err"
        return None
    
    tds = tbody[0].fetch('td')

    results = []
    for i in range(0, len(tds)/2):
        if i < 2:
            continue
        val = tds[i*2].renderContents()
        name = tds[i*2+1].renderContents()
        if val == '0,000':
            continue
        if name == 'TBD':
            rank = i
        results.append(  (name, val) )

    return (rank, results)

Пример #5

Показать файл

Файл: submit_fuel2.py Проект: adept/icfpc2010-futamura-rejection

def list_cars():

    br = login()
            
    response = br.follow_link(text_regex=r".*Submit fuel.*")
    body = response.read()
    
    #print body
    #body = open('body').read()
    
    bs = BeautifulSoup(body)
    #print bs.prettify()
    #print bs.nextSibling.prettify()
    ids = []
    for tr in bs.fetch('tr'):
        if len(tr.fetch('td')) < 1:
            continue
        suppliers = str(tr.fetch('td')[1].contents[0])
        
        f = tr.fetch('form')[0]
        id = f.get('action')
        m = re.search(r"\/(\d+)\/", id)
        id = m.group(1)
        
        ids.append((id, suppliers))
    return ids

Пример #6

Показать файл

Файл: submit_fuel.py Проект: Vlad-Shcherbina/icfpc2010-tbd

def get_cardata(br, car):
     
    if br == None:
        br = login()
    
    res = br.open("http://icfpcontest.org/icfp10/instance/{0}/solve/form".format(car))
     
    body = res.read()
     
    bs = BeautifulSoup(body)
    form = bs.fetch('form')[0]
    cardata = form.div.contents[1]
    return cardata

Пример #7

Показать файл

Файл: update_car_data.py Проект: Vlad-Shcherbina/icfpc2010-tbd

def get_car_data(br, id):
    tries = 0
    while True: 
        try:
            res = br.open("http://icfpcontest.org/icfp10/instance/{0}/solve/form".format(id))
            body = res.read()
            bs = BeautifulSoup(body)
            form = bs.fetch('form')[0]
            data = form.div.contents[1]
            return data
        except Exception as exc:
            tries += 1
            if tries > 2:
                raise
            print 'Omg! ' + str(exc)

Пример #8

Показать файл

Файл: fetch_stats.py Проект: Vlad-Shcherbina/icfpc2010-tbd

def fetch_stats(br=None):
    if br is None:
        br = login()
    
    response = br.open("http://icfpcontest.org/icfp10/")
    
    body = response.read()
    #body = open('body').read()
    #print body
    bs = BeautifulSoup(body)

    score = bs.fetch('div', {'id': '_score_id'})[0].renderContents()
    csolved = bs.fetch('div', {'id': '_solution_id'})[0].renderContents()
    csubmitted = bs.fetch('div', {'id': '_instance_id'})[0].renderContents()
    
    return (score, csolved, csubmitted)

Пример #9

Показать файл

Файл: refresh_car_ids.py Проект: Vlad-Shcherbina/icfpc2010-tbd

def refresh_ids(br=None):
    
    br = mechanize.Browser()
    
    data = csv.reader(open('../data/car_ids'))
    data = list(data)
    
    newcars = []
    maxid = 0
    oldmaxid = 0
    
    for c in data:
        if len(c) < 2:
            continue
        if int(c[0]) > maxid:
            maxid = int(c[0])
    print maxid
    
    while(oldmaxid != maxid):
        print "request ", maxid 
        response = br.open("http://nfa.imn.htwk-leipzig.de/recent_cars/?G0="+str(maxid))
        oldmaxid = maxid
        
        body = response.read()
        #body = open('body').read()
        #print body
        bs = BeautifulSoup(body)
    
        for pre in bs.fetch('pre'):
            vdata = pre.renderContents()
            m = re.match(r"\((\d+),.*?\&quot\;([012]+)\&quot\;\)", vdata)
            data = ()
            if m:
                data = (m.group(1), m.group(2))
                newcars.append( data )
                if data[0] > maxid:
                    maxid = data[0]
        
    cid = open('../data/car_ids', 'a')
    cdata =open('../data/car_data', 'a')
    for c in newcars:
        cid.write("{0}, 0\n".format(c[0]))
        cdata.write("{0}, {1}\n".format(c[0], c[1]))
        
    return newcars

Пример #10

Показать файл

Файл: scrape.py Проект: pebbie/BIBINT

def parse_citmap(doc):
    def get_docs(sp):
        out = []
        if sp is not None:
            for a in sp.findAll("a"):
                try:
                    if "articleDetails" in a["href"]:
                        up = urltoarnumber(a['href'])
                        if up not in out: out.append(up)
                except:
                    pass
        return out
    #to_file("dump.soup", doc)
    soup = BeautifulSoup(doc)
    citing = get_docs(soup.find("div", {'id':'colFirst'}))
    citedby = get_docs(soup.find("div", {'id':'colSecond'}))
        
    return dict(citing=citing, citedby=citedby)

Пример #11

Показать файл

Файл: update_car_data.py Проект: Vlad-Shcherbina/icfpc2010-tbd

def fetch_new_ids(br, old_ids):
    print 'fetching new ids'
    new_ids = set()
    for i in range(200):
        print 'page%d' % i
        response = br.open("http://icfpcontest.org/icfp10/score/instanceTeamCount?page=%d&size=10" % i)
        assert br.viewing_html()
        body = response.read()
        bs = BeautifulSoup(body)
        page_ids = set()
        for tr in bs.fetch('tr'):
            for td in tr.fetch('td'):
                if (td.get('style') or '').strip() == 'width: 20%;': 
                    page_ids.add(td.contents[0].strip())
        if not page_ids: break
        old_page_ids = page_ids & old_ids
        page_ids -= old_page_ids
        new_ids.update(page_ids)
        if old_page_ids: break # if we see any old ids
        time.sleep(0.5)
    print '%d new ids fetched' % len(new_ids)
    return new_ids

Пример #12

Показать файл

Файл: claim_sync.py Проект: dckc/hh-office

 def claims(self, batch):
     log.debug('claims: open (%s)', batch.href)
     doc = HTML(self.open(batch.href).get_data())
     doc.done()
     t = doc.html.first('table', {'align': 'Center'})
     return [claim(row) for row in t('tr')[1:]]