def list_cars(): br = mechanize.Browser() response = br.open("http://icfpcontest.org/icfp10/score/instanceTeamCount") assert br.viewing_html() body = response.read() #print body #body = open('body').read() bs = BeautifulSoup(body) #print bs.prettify() #print bs.nextSibling.prettify() ids = [] for tr in bs.fetch('tr'): if len(tr.fetch('td')) < 1: continue suppliers = str(tr.fetch('td')[1].contents[0]) f = tr.fetch('form')[0] id = f.get('action') m = re.search(r"\/(\d+)\/", id) id = m.group(1) ids.append((id, suppliers)) return ids
def _batches(self, txt): doc = HTML(txt) doc.done() t = doc.html.first('table', {'align': 'Center'}) return [batch(row('td')[1]) # skip heading row for row in t('tr')[1:]]
def submit_test_car_fuel(cardata, fuel): br = mechanize.Browser() #br.set_response() res = br.open("http://nfa.imn.htwk-leipzig.de/icfpcont/") #res = br.post("http://nfa.imn.htwk-leipzig.de/icfpcont/") br.select_form(nr=0) br["G0"] = cardata br["G1"] = fuel response = br.submit() body = response.read() # print body bs = BeautifulSoup(body) print bs result = '' for pre in bs.fetch('pre'): result += pre.renderContents() if FAIL_ON_SUBMISSION_ERROR: assert result.find('Good!') != -1, result return result
def fetch_results(): br = mechanize.Browser() response = br.open("http://icfpcontest.org/icfp10/score/teamAll") body = response.read() #body = open('body').read() #print body bs = BeautifulSoup(body) tbody = bs.fetch('table') if len(tbody) < 1: print "Server err" return None tds = tbody[0].fetch('td') results = [] for i in range(0, len(tds)/2): if i < 2: continue val = tds[i*2].renderContents() name = tds[i*2+1].renderContents() if val == '0,000': continue if name == 'TBD': rank = i results.append( (name, val) ) return (rank, results)
def list_cars(): br = login() response = br.follow_link(text_regex=r".*Submit fuel.*") body = response.read() #print body #body = open('body').read() bs = BeautifulSoup(body) #print bs.prettify() #print bs.nextSibling.prettify() ids = [] for tr in bs.fetch('tr'): if len(tr.fetch('td')) < 1: continue suppliers = str(tr.fetch('td')[1].contents[0]) f = tr.fetch('form')[0] id = f.get('action') m = re.search(r"\/(\d+)\/", id) id = m.group(1) ids.append((id, suppliers)) return ids
def get_cardata(br, car): if br == None: br = login() res = br.open("http://icfpcontest.org/icfp10/instance/{0}/solve/form".format(car)) body = res.read() bs = BeautifulSoup(body) form = bs.fetch('form')[0] cardata = form.div.contents[1] return cardata
def get_car_data(br, id): tries = 0 while True: try: res = br.open("http://icfpcontest.org/icfp10/instance/{0}/solve/form".format(id)) body = res.read() bs = BeautifulSoup(body) form = bs.fetch('form')[0] data = form.div.contents[1] return data except Exception as exc: tries += 1 if tries > 2: raise print 'Omg! ' + str(exc)
def fetch_stats(br=None): if br is None: br = login() response = br.open("http://icfpcontest.org/icfp10/") body = response.read() #body = open('body').read() #print body bs = BeautifulSoup(body) score = bs.fetch('div', {'id': '_score_id'})[0].renderContents() csolved = bs.fetch('div', {'id': '_solution_id'})[0].renderContents() csubmitted = bs.fetch('div', {'id': '_instance_id'})[0].renderContents() return (score, csolved, csubmitted)
def refresh_ids(br=None): br = mechanize.Browser() data = csv.reader(open('../data/car_ids')) data = list(data) newcars = [] maxid = 0 oldmaxid = 0 for c in data: if len(c) < 2: continue if int(c[0]) > maxid: maxid = int(c[0]) print maxid while(oldmaxid != maxid): print "request ", maxid response = br.open("http://nfa.imn.htwk-leipzig.de/recent_cars/?G0="+str(maxid)) oldmaxid = maxid body = response.read() #body = open('body').read() #print body bs = BeautifulSoup(body) for pre in bs.fetch('pre'): vdata = pre.renderContents() m = re.match(r"\((\d+),.*?\"\;([012]+)\"\;\)", vdata) data = () if m: data = (m.group(1), m.group(2)) newcars.append( data ) if data[0] > maxid: maxid = data[0] cid = open('../data/car_ids', 'a') cdata =open('../data/car_data', 'a') for c in newcars: cid.write("{0}, 0\n".format(c[0])) cdata.write("{0}, {1}\n".format(c[0], c[1])) return newcars
def parse_citmap(doc): def get_docs(sp): out = [] if sp is not None: for a in sp.findAll("a"): try: if "articleDetails" in a["href"]: up = urltoarnumber(a['href']) if up not in out: out.append(up) except: pass return out #to_file("dump.soup", doc) soup = BeautifulSoup(doc) citing = get_docs(soup.find("div", {'id':'colFirst'})) citedby = get_docs(soup.find("div", {'id':'colSecond'})) return dict(citing=citing, citedby=citedby)
def fetch_new_ids(br, old_ids): print 'fetching new ids' new_ids = set() for i in range(200): print 'page%d' % i response = br.open("http://icfpcontest.org/icfp10/score/instanceTeamCount?page=%d&size=10" % i) assert br.viewing_html() body = response.read() bs = BeautifulSoup(body) page_ids = set() for tr in bs.fetch('tr'): for td in tr.fetch('td'): if (td.get('style') or '').strip() == 'width: 20%;': page_ids.add(td.contents[0].strip()) if not page_ids: break old_page_ids = page_ids & old_ids page_ids -= old_page_ids new_ids.update(page_ids) if old_page_ids: break # if we see any old ids time.sleep(0.5) print '%d new ids fetched' % len(new_ids) return new_ids
def claims(self, batch): log.debug('claims: open (%s)', batch.href) doc = HTML(self.open(batch.href).get_data()) doc.done() t = doc.html.first('table', {'align': 'Center'}) return [claim(row) for row in t('tr')[1:]]