class check_the_mangas(): def __init__(self,manga_name, db_conn): self.db_conn = db_conn self.manga_name = manga_name self.manga_oldnumber = sqlite_manager.get_manga_chapter( db_conn, manga_name) self.manga_nownumber = self.manga_oldnumber self.manga_olddate = sqlite_manager.get_manga_date( db_conn, manga_name) self.nowdate = self.today_date() self.br = URLopener() def today_date(self): return subprocess.check_output(["date","+%a-%b-%e"]).replace("\n","") #return 1 if the connection is working def test_connection(self): try: response = self.br.open(configuration.WEBSITE_TO_CHECK_CONNECTION).read() if configuration.KEYWORD in response: return 1 else: return 0 except: print "manga connection" return 0 def exec_cmd(self): pid = os.fork() os.umask(0) os.system(configuration.MANGA_NEW_CMD.replace("MANGA",self.manga_name)) def run(self): if( self.test_connection() ): last_chapter = False try: while(last_chapter==False): to_open = "http://www.mangareader.net/" + self.manga_name + "/" + str( int(self.manga_nownumber)+1 ) response = self.br.open( to_open).read() if "is not released yet" in response or "not published yet" in response or response == "": last_chapter = True if self.manga_nownumber != sqlite_manager.get_manga_chapter(self.db_conn, self.manga_name): print self.manga_name+":"+self.manga_nownumber+":"+self.nowdate sqlite_manager.update_manga(self.db_conn, self.manga_name, self.manga_nownumber, self.nowdate) else: self.manga_nownumber = str( int(self.manga_nownumber)+1 ) except Exception,e : if "is not released yet. If you liked" in response: if self.manga_nownumber != sqlite_manager.get_manga_chapter(self.db_conn,self.manga_name): print self.manga_name+":"+self.manga_nownumber+":"+self.nowdate sqlite_manager.update_manga(self.db_conn, self.manga_name, self.manga_nownumber, self.nowdate) pass
class check_the_mangas(): def __init__(self,manga_name): self.manga_name = manga_name self.myfile = open(configuration.DATA_FILE,'r').read() self.manga_oldnumber = self.get_number() self.manga_nownumber = self.manga_oldnumber self.manga_olddate = self.get_date () self.nowdate = self.today_date() self.br = URLopener() def get_number(self): return re.findall(self.manga_name+':([0-9]+):',self.myfile)[0] def get_date(self): return re.findall(self.manga_name+":"+str(self.manga_oldnumber)+':(.*)\n',self.myfile)[0] def today_date(self): return subprocess.check_output(["date","+%a-%b-%e"]).replace("\n","") #return 1 if the connection is working def test_connection(self): try: response = self.br.open(configuration.WEBSITE_TO_CHECK_CONNECTION).read() if configuration.KEYWORD in response: return 1 else: return 0 except: print "manga connection" return 0 def exec_cmd(self): pid = os.fork() os.umask(0) os.system(configuration.MANGA_NEW_CMD.replace("MANGA",self.manga_name)) def run(self): if( self.test_connection() ): last_chapter = False try: while(last_chapter==False): to_open = "http://www.mangareader.net/" + self.manga_name + "/" + str( int(self.manga_nownumber)+1 ) response = self.br.open( to_open).read() if "is not released yet" in response or "not published yet" in response or response == "": last_chapter = True if self.manga_name + ":" + str(self.manga_nownumber) not in open(configuration.DATA_FILE, "r").read(): Thread(target=self.exec_cmd).start() configuration.backup() open(configuration.DATA_FILE,'w').write(open(configuration.DATA_FILE+".bak", "r").read().replace(self.manga_name+":"+str(self.manga_oldnumber)+":"+ self.manga_olddate, self.manga_name+":"+str(self.manga_nownumber)+":"+self.nowdate)) else: print "not last chapter" self.manga_nownumber = str( int(self.manga_nownumber)+1 ) except Exception,e : print e print "manga run" if "is not released yet. If you liked" in response: if self.manga_name + ":" + str(self.manga_nownumber) not in open(configuration.DATA_FILE, "r").read(): configuration.backup() open(configuration.DATA_FILE,'w').write(open(configuration.DATA_FILE+".bak", "r").read().replace(self.manga_name+":"+str(self.manga_oldnumber)+":"+ self.manga_olddate, self.manga_name+":"+str(self.manga_nownumber)+":"+self.nowdate)) pass
def unshortenurl(short): from urllib import URLopener opener = URLopener() try: opener.open(short) except IOError, e: f = e
class Updater: def __init__(self, server, infoFile): """ takes a server location and an info file as parameters in the constructor it will use this server to fetch the new information there should be a json/version and json/info.json dir on this server """ self._infoFile = infoFile self._serverJSON = server + self._infoFile self._serverDate = server + "json/version" if sys.version < '3': self.br = URLopener() else: self.br = request def hasNewInfo(self): """ hasNewInfo :: Boolean compare the local version tag with the one found on the server and returns true if the server version is newer """ jsonDate = open(location_manager.VERSION, 'r').read().strip() if sys.version < '3': servDate = self.br.open(self._serverDate).read().strip() else: servDate = self.br.urlopen(self._serverDate).read().strip() return (int(jsonDate) < int(servDate)) def generateTimeStamp(self): """ generateTimeStamp :: String returns a string that is used to timestamp old config backup files """ return open(location_manager.VERSION, 'r').read().strip() def fetchNewInfo(self): """ fetchNewInfo :: Void it will download the info file from the server use the timestamp to back it up and overwrite it """ # Fetching server's info.json if sys.version < '3': response = self.br.open(self._serverJSON).read() else: response = self.br.urlopen(self._serverJSON).read().decode("utf-8") oldInfo = open(self._infoFile, 'r').read() open(self._infoFile + "." + self.generateTimeStamp(), 'w').write(oldInfo) open(self._infoFile, 'w').write(response) # Fetching server's version if sys.version < '3': servDate = int(self.br.open(self._serverDate).read().strip()) else: servDate = int(self.br.urlopen(self._serverDate).read().strip()) open(location_manager.VERSION, 'w').write(str(servDate))
class Updater: def __init__(self, server, infoFile): """ takes a server location and an info file as parameters in the constructor it will use this server to fetch the new information there should be a json/version and json/info.json dir on this server """ self._infoFile = infoFile self._serverJSON = server + self._infoFile self._serverDate = server + "json/version" if sys.version < '3': self.br = URLopener() else: self.br = request def hasNewInfo(self): """ hasNewInfo :: Boolean compare the local version tag with the one found on the server and returns true if the server version is newer """ jsonDate = open(location_manager.VERSION , 'r').read().strip() if sys.version < '3': servDate = self.br.open(self._serverDate).read().strip() else: servDate = self.br.urlopen(self._serverDate).read().strip() return (int(jsonDate) < int(servDate)) def generateTimeStamp(self): """ generateTimeStamp :: String returns a string that is used to timestamp old config backup files """ return open(location_manager.VERSION, 'r').read().strip() def fetchNewInfo(self): """ fetchNewInfo :: Void it will download the info file from the server use the timestamp to back it up and overwrite it """ # Fetching server's info.json if sys.version < '3': response = self.br.open(self._serverJSON).read() else: response = self.br.urlopen(self._serverJSON).read().decode("utf-8") oldInfo = open(self._infoFile, 'r').read() open(self._infoFile + "." + self.generateTimeStamp(), 'w').write(oldInfo) open(self._infoFile, 'w').write(response) # Fetching server's version if sys.version < '3': servDate = int(self.br.open(self._serverDate).read().strip()) else: servDate = int(self.br.urlopen(self._serverDate).read().strip()) open(location_manager.VERSION, 'w').write(str(servDate))
def connection(): try: br = URLopener() response = br.open(configuration.WEBSITE_TO_CHECK_CONNECTION).read() if configuration.KEYWORD in response: return 1 else: return 0 except: return 0
def utGrabFromUrl(p_url): """ Takes a file from a remote server """ from urllib import URLopener try: l_opener = URLopener() l_file = l_opener.open(p_url) ctype = l_file.headers['Content-Type'] l_opener.close() return (l_file.read(), ctype) except: return (None, 'text/x-unknown-content-type')
class Updater: """ takes a server location and an info file as parameters in the constructor it will use this server to fetch the new information there should be a /hash and /info.json dir on this server """ def __init__(self,server,infoFile): self._server = server self._infoFile = infoFile self.br = URLopener() """ hasNewInfo :: Boolean compare the local info file hash with the one found on the server and returns true if they are different """ def hasNewInfo(self): f = open(self._infoFile,'r').read() m = md5.new(f).hexdigest() response = self.br.open(self._server+'/hash').read() response = response.replace("\n","") return (m!=response) """ generateTimeStamp :: String returns a string that is used to timestamp old config backup files """ def generateTimeStamp(self): return str(time.gmtime().tm_year)+"_"+str(time.gmtime().tm_mday)+"_"+str(time.gmtime().tm_hour)+"_"+str(time.gmtime().tm_min) """ fetchNewInfo :: Void it will download the info file from the server use the timestamp to back it up and overwrite it """ def fetchNewInfo(self): response = self.br.open(self._server+'/info.json').read() oldInfo = open(self._infoFile,'r').read() open(self._infoFile+"."+self.generateTimeStamp(),'w').write(oldInfo) open(self._infoFile,'w').write(response)
class Updater: def __init__(self,server,infoFile): self._server = server self._infoFile = infoFile self.br = URLopener() def hasNewInfo(self): f = open(self._infoFile,'r').read() m = md5.new(f).hexdigest() response = self.br.open(self._server+'/hash').read() response = response.replace("\n","") return (m!=response) def generateTimeStamp(self): return str(time.gmtime().tm_year)+"_"+str(time.gmtime().tm_mday)+"_"+str(time.gmtime().tm_hour)+"_"+str(time.gmtime().tm_min) def fetchNewInfo(self): response = self.br.open(self._server+'/info.json').read() oldInfo = open(self._infoFile,'r').read() open(self._infoFile+"."+self.generateTimeStamp(),'w').write(oldInfo) open(self._infoFile,'w').write(response)
def call_remote(self, category, params): ''' The meetup api is set up such that the root url does not change much other than the'name' of the thing you call into. In other words, I can just use category to sprintf my way to a valid url, then tack on the rest of the query string specified in params. ''' url = self.root_url url = url % (category) # Every call has to include key url = url + "?" + params + "&key=" + self.key client = URLopener() request = client.open(url) raw_str = request.read() results = json.loads(raw_str) # Let the caller interpret the results of the call. Both the # meta info and the results are passed back return results
def call_remote(self,category,params): ''' The meetup api is set up such that the root url does not change much other than the'name' of the thing you call into. In other words, I can just use category to sprintf my way to a valid url, then tack on the rest of the query string specified in params. ''' url = self.root_url url = url % (category) # Every call has to include key url = url + "?" + params + "&key=" + self.key client = URLopener() request = client.open(url) raw_str = request.read() results = json.loads(raw_str) # Let the caller interpret the results of the call. Both the # meta info and the results are passed back return results
memoHeadings = {} posCount = 0 for elem in courtElems: if not elem.text: continue country = normalStr(elem.text) if country == "Congo RDC": continue strIO = cStringIO.StringIO() urlStub = elem.attrib['href'] if urlStub == '/wlg/courts/nofr/usstates/lxctusa.htm': continue if urlStub == '/wlg/courts/nofr/oeur/lxctjap.htm': continue print country countryHtml = urlh.open(siteRoot + urlStub).read() options = dict(output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0) countryHtml = tidy.parseString(countryHtml,**options) countryHtml.write(strIO) strIO.seek(0) countryHtml = strIO.read() strIO.close() countryHtml = re.sub('xmlns="[^"]+"',"",countryHtml) countryDoc = etree.fromstring(countryHtml) courtHeadingElems = countryDoc.xpath("//font[@color='#009944']") for e in courtHeadingElems: heading = normalStr(e.text) #if not allHeadings.has_key(heading): # posCount += 1 # continue
class mangareader_downloader(object): def __init__(self,manga_name,chapter,end_chapter,manga_location,dl_manager): self.manga_location = manga_location self.manga_name = manga_name self.chapter = chapter self.end_chapter = end_chapter self.flag = False self.current_image = "000" self.img = "" self.next_link = "" self.current_page = "http://www.mangareader.net/"+self.manga_name+"/"+self.chapter+"/" self.next_regex = "<span class=\"next\"><a href=\"([^\"]*)\">Next</a></span>" self.nb_of_pages = 0 self.page_counter = 2 self.br = URLopener() self.response = "" self.response_lines = "" self.dl_manager = dl_manager self.resolved = { 'http://i0':'188.132.173.122', 'http://i1':'188.132.173.3', 'http://i2':'188.132.173.6', 'http://i3':'188.132.173.9', 'http://i4':'188.132.173.12', 'http://i5':'188.132.173.15', 'http://i6':'188.132.173.18', 'http://i7':'188.132.173.21', 'http://i8':'188.132.173.24', 'http://i9':'188.132.173.27', 'http://i10':'188.132.173.30', 'http://i11':'188.132.173.33', 'http://i12':'188.132.173.36', 'http://i13':'188.132.173.39', 'http://i14':'188.132.173.42', 'http://i15':'188.132.173.45', 'http://i16':'188.132.173.48', 'http://i17':'188.132.173.51', 'http://i18':'188.132.173.54', 'http://i19':'188.132.173.57', 'http://i20':'188.132.173.60', 'http://i21':'188.132.173.63', 'http://i22':'188.132.173.66', 'http://i23':'188.132.173.69', 'http://i24':'188.132.173.72', 'http://i25':'188.132.173.75', 'http://i26':'188.132.173.78', 'http://i27':'188.132.173.81', 'http://i28':'188.132.173.84', 'http://i29':'188.132.173.87', 'http://i30':'188.132.173.90', 'http://i31':'188.132.173.93', 'http://i32':'188.132.173.96', 'http://i33':'188.132.173.99', 'http://i34':'188.132.173.126', 'http://i35':'188.132.173.129', 'http://i36':'188.132.173.132', 'http://i37':'188.132.173.135', 'http://i38':'188.132.173.138', 'http://i39':'188.132.173.141', 'http://i40':'188.132.173.144', 'http://i41':'188.132.173.200', 'http://i1000':'188.132.173.200', 'http://i999':'188.132.173.12', 'http://i998':'188.132.173.48', 'http://i997':'188.132.173.72', 'http://i996':'188.132.173.96', 'http://i995':'188.132.173.144', 'http://i994':'188.132.173.200' } def increase_current(self): self.current_image = str(int(self.current_image)+1) if len(self.current_image) == 1: self.current_image = "00"+self.current_image elif len(self.current_image) == 2: self.current_image = "0"+self.current_image self.page_counter+=1 def increase_chapter(self): self.nb_of_pages = 0 self.page_counter = 1 self.chapter = str(int(self.chapter)+1) self.current_image = "000" self.next_link = "http://www.mangareader.net/"+self.manga_name+"/"+self.chapter+"/"+str(self.page_counter) self.page_counter +=1 def check_chapter_end(self): if self.page_counter-1 == self.nb_of_pages: return True else : return False def not_published(self): if "is not published yet. Once" in self.response or self.chapter == str(int(self.end_chapter)+1): return True return False def go_to_next_page(self): if not self.check_chapter_end(): self.increase_current() else: self.increase_chapter() self.current_page = self.next_link def scrap_page(self): if self.nb_of_pages == 0: for a in self.response_lines: if "</select> of " in a: self.nb_of_pages = int(re.findall("</select> of (\d+)",a)[0]) break for a in self.response_lines: if '"><img id=\"img\"' in a: self.img = re.findall("src=\"([^\"]*)\" alt",a)[0] break self.next_link = "http://www.mangareader.net/"+self.manga_name+"/"+self.chapter+"/"+str(self.page_counter) def manage_chapters(self): if not os.path.exists(self.manga_location): os.mkdir(self.manga_location) os.chdir(self.manga_location) if not os.path.exists(self.manga_name): os.mkdir(self.manga_name) os.chdir(self.manga_name) if not os.path.exists(self.manga_name+"-"+self.chapter): os.mkdir(self.manga_name+"-"+self.chapter) os.chdir(self.manga_name+"-"+self.chapter) def download_image(self): self.manage_chapters() caching = self.img.split('.')[0] if caching in self.resolved: self.img = self.img.replace( caching+".mangareader.net", "http://"+self.resolved[caching]) if self.dl_manager == 'default': urlretrieve(self.img, self.current_image+'.jpg' ) else: status = 1 while int(status) != 0: status = os.system(self.dl_manager +" "+self.img+ " -o "+self.current_image+".jpg") print "[*] Image saved to "+ os.getcwd() + "/"+self.current_image+".jpg" def start_downloading(self): try: self.response = self.br.open(self.current_page).read() self.response_lines = self.response.split("\n") if not self.not_published(): self.scrap_page() self.manage_chapters() self.download_image() self.go_to_next_page() else : self.flag = True except Exception,e: print e time.sleep(2) self.start_downloading()
We need to liaise with Shima-san about contributing to the data set. ''' import os,sys,re from urllib import URLopener from lxml import etree import json obj = {} urlh = URLopener() html = urlh.open("http://en.wikipedia.org/wiki/List_of_supreme_courts_by_country").read() doc = etree.fromstring(html) entries = doc.xpath("//table[@class='wikitable']//tr") stops = ["the","of","a"] def makeID(court): courtID = court.lower() courtID = re.split("\s+",courtID) for i in range(len(courtID)-1,-1,-1): word = courtID[i] if word in stops: courtID = courtID[0:i] + courtID[i+1:] return ".".join(courtID)
word. We need to liaise with Shima-san about contributing to the data set. ''' import os, sys, re from urllib import URLopener from lxml import etree import json obj = {} urlh = URLopener() html = urlh.open( "http://en.wikipedia.org/wiki/List_of_supreme_courts_by_country").read() doc = etree.fromstring(html) entries = doc.xpath("//table[@class='wikitable']//tr") stops = ["the", "of", "a"] def makeID(court): courtID = court.lower() courtID = re.split("\s+", courtID) for i in range(len(courtID) - 1, -1, -1): word = courtID[i] if word in stops: courtID = courtID[0:i] + courtID[i + 1:]
class mangareader_downloader(object): def __init__(self, manga_name, chapter, end_chapter, manga_location, dl_manager): self.manga_location = manga_location self.manga_name = manga_name self.chapter = chapter self.end_chapter = end_chapter self.flag = False self.current_image = "000" self.img = "" self.next_link = "" self.current_page = "http://www.mangareader.net/" + self.manga_name + "/" + self.chapter + "/" self.next_regex = "<span class=\"next\"><a href=\"([^\"]*)\">Next</a></span>" self.nb_of_pages = 0 self.page_counter = 2 self.br = URLopener() self.response = "" self.response_lines = "" self.dl_manager = dl_manager self.resolved = { 'http://i0': '188.132.173.122', 'http://i1': '188.132.173.3', 'http://i2': '188.132.173.6', 'http://i3': '188.132.173.9', 'http://i4': '188.132.173.12', 'http://i5': '188.132.173.15', 'http://i6': '188.132.173.18', 'http://i7': '188.132.173.21', 'http://i8': '188.132.173.24', 'http://i9': '188.132.173.27', 'http://i10': '188.132.173.30', 'http://i11': '188.132.173.33', 'http://i12': '188.132.173.36', 'http://i13': '188.132.173.39', 'http://i14': '188.132.173.42', 'http://i15': '188.132.173.45', 'http://i16': '188.132.173.48', 'http://i17': '188.132.173.51', 'http://i18': '188.132.173.54', 'http://i19': '188.132.173.57', 'http://i20': '188.132.173.60', 'http://i21': '188.132.173.63', 'http://i22': '188.132.173.66', 'http://i23': '188.132.173.69', 'http://i24': '188.132.173.72', 'http://i25': '188.132.173.75', 'http://i26': '188.132.173.78', 'http://i27': '188.132.173.81', 'http://i28': '188.132.173.84', 'http://i29': '188.132.173.87', 'http://i30': '188.132.173.90', 'http://i31': '188.132.173.93', 'http://i32': '188.132.173.96', 'http://i33': '188.132.173.99', 'http://i34': '188.132.173.126', 'http://i35': '188.132.173.129', 'http://i36': '188.132.173.132', 'http://i37': '188.132.173.135', 'http://i38': '188.132.173.138', 'http://i39': '188.132.173.141', 'http://i40': '188.132.173.144', 'http://i41': '188.132.173.200', 'http://i1000': '188.132.173.200', 'http://i999': '188.132.173.12', 'http://i998': '188.132.173.48', 'http://i997': '188.132.173.72', 'http://i996': '188.132.173.96', 'http://i995': '188.132.173.144', 'http://i994': '188.132.173.200' } def increase_current(self): self.current_image = str(int(self.current_image) + 1) if len(self.current_image) == 1: self.current_image = "00" + self.current_image elif len(self.current_image) == 2: self.current_image = "0" + self.current_image self.page_counter += 1 def increase_chapter(self): self.nb_of_pages = 0 self.page_counter = 1 self.chapter = str(int(self.chapter) + 1) self.current_image = "000" self.next_link = "http://www.mangareader.net/" + self.manga_name + "/" + self.chapter + "/" + str( self.page_counter) self.page_counter += 1 def check_chapter_end(self): if self.page_counter - 1 == self.nb_of_pages: return True else: return False def not_published(self): if "is not published yet. Once" in self.response or self.chapter == str( int(self.end_chapter) + 1): return True return False def go_to_next_page(self): if not self.check_chapter_end(): self.increase_current() else: self.increase_chapter() self.current_page = self.next_link def scrap_page(self): if self.nb_of_pages == 0: for a in self.response_lines: if "</select> of " in a: self.nb_of_pages = int( re.findall("</select> of (\d+)", a)[0]) break for a in self.response_lines: if '"><img id=\"img\"' in a: self.img = re.findall("src=\"([^\"]*)\" alt", a)[0] break self.next_link = "http://www.mangareader.net/" + self.manga_name + "/" + self.chapter + "/" + str( self.page_counter) def manage_chapters(self): if not os.path.exists(self.manga_location): os.mkdir(self.manga_location) os.chdir(self.manga_location) if not os.path.exists(self.manga_name): os.mkdir(self.manga_name) os.chdir(self.manga_name) if not os.path.exists(self.manga_name + "-" + self.chapter): os.mkdir(self.manga_name + "-" + self.chapter) os.chdir(self.manga_name + "-" + self.chapter) def download_image(self): self.manage_chapters() caching = self.img.split('.')[0] if caching in self.resolved: self.img = self.img.replace(caching + ".mangareader.net", "http://" + self.resolved[caching]) if self.dl_manager == 'default': urlretrieve(self.img, self.current_image + '.jpg') else: status = 1 while int(status) != 0: status = os.system(self.dl_manager + " " + self.img + " -o " + self.current_image + ".jpg") print "[*] Image saved to " + os.getcwd( ) + "/" + self.current_image + ".jpg" def start_downloading(self): try: self.response = self.br.open(self.current_page).read() self.response_lines = self.response.split("\n") if not self.not_published(): self.scrap_page() self.manage_chapters() self.download_image() self.go_to_next_page() else: self.flag = True except Exception, e: print e time.sleep(2) self.start_downloading()
#!/usr/bin/env python from re import sub from BeautifulSoup import BeautifulSoup from urllib import URLopener opener = URLopener() html = opener.open('http://www.dailyzen.com/').read() html = html[html.index('<!--Add Quote for correct day-->'):] html1 = html[:html.index('<br>')] html2 = html[html.index('<A class="artist">'):] html2 = html2[:html2.index('</a></i>')] html2 = sub('<A class="artist">','',html2).strip() zen = BeautifulSoup(html1) zen = zen.prettify().strip() for x in ['<!--Add Quote for correct day-->','<br />','<p>','</p>','^\n','\n$']: zen = sub(x,'',zen).strip() zen = sub('\n \n \n','\n \n',zen).strip() print print zen print print '\t\t',html2
memoHeadings = {} posCount = 0 for elem in courtElems: if not elem.text: continue country = normalStr(elem.text) if country == "Congo RDC": continue strIO = cStringIO.StringIO() urlStub = elem.attrib['href'] if urlStub == '/wlg/courts/nofr/usstates/lxctusa.htm': continue if urlStub == '/wlg/courts/nofr/oeur/lxctjap.htm': continue print country countryHtml = urlh.open(siteRoot + urlStub).read() options = dict(output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0) countryHtml = tidy.parseString(countryHtml, **options) countryHtml.write(strIO) strIO.seek(0) countryHtml = strIO.read() strIO.close() countryHtml = re.sub('xmlns="[^"]+"', "", countryHtml) countryDoc = etree.fromstring(countryHtml) courtHeadingElems = countryDoc.xpath("//font[@color='#009944']") for e in courtHeadingElems: heading = normalStr(e.text) #if not allHeadings.has_key(heading): # posCount += 1 # continue
def test_ping_play1(): from urllib import URLopener u = URLopener() text = "<title>pypy.js various demos</title>" assert u.open("http://play1.pypy.org/").read().find(text) != -1
def open(self, *args): f = URLopener.open(self, *args) return XML(f)