def run_strip_scripts (source): soup = BeautifulSoup(source.lower()) to_extract = soup.findAll('script') for item in to_extract: item.extract() return str(soup)
def myfunc(i): for i in range(i,i+10): if (i != 404) and (i !=472): #404 doesnt work and 472 has stupid css tags causing errors response = urllib2.urlopen("http://xkcd.com/"+str(i)+"/") page=response.read() soup=BeautifulSoup(page) results = soup.findAll('img') picture_page = results[1]['src'] opener1 = urllib2.build_opener() page1 = opener1.open(picture_page) my_picture = page1.read() filename = str(i) + ' - ' + results[1]['alt'] fout = open(filename, "wb") fout.write(my_picture) fout.close() print "Writing to " + filename
#first=int(raw_input('Enter first roll: ')) #last=int(raw_input('Enter last roll: ')) first = 12008106001 last = 12008106111 last += 1 conn = sqlite3.connect('test.db') f = conn.cursor() f.execute('create table marks (roll text, name text)') for i in range(first, last): j = str(i) response = urllib2.urlopen( "http://result.annauniv.edu/cgi-bin/result/result10gr.pl?regno=" + j) page = response.read() soup = BeautifulSoup(page) results = soup.findAll('font', attrs={'color': "Brown"}) roll = results[0].renderContents() name = results[1].renderContents() print roll, " - ", name t = (roll, name) f.execute('insert into marks values (?,?)', t) conn.commit() f.close()
#first=int(raw_input('Enter first roll: ')) #last=int(raw_input('Enter last roll: ')) first=12008106001 last=12008106111 last+=1 conn = sqlite3.connect('test.db') f= conn.cursor() f.execute('create table marks (roll text, name text)') for i in range(first,last): j=str(i) response = urllib2.urlopen("http://result.annauniv.edu/cgi-bin/result/result10gr.pl?regno="+j) page=response.read() soup=BeautifulSoup(page) results = soup.findAll('font', attrs={'color' : "Brown"}) roll=results[0].renderContents() name=results[1].renderContents() print roll ," - ", name t=(roll,name) f.execute('insert into marks values (?,?)',t) conn.commit() f.close()