def load_from_url(url): """ If the URL starts with 'http:' load a BT .torrent or Tribler .tstream file from the URL and convert it into a TorrentDef. If the URL starts with our URL scheme, we convert the URL to a URL-compatible TorrentDef. @param url URL @return TorrentDef. """ # Class method, no locking required if url.startswith(P2PURL_SCHEME): (metainfo, swarmid) = makeurl.p2purl2metainfo(url) # Metainfo created from URL, so create URL compatible TorrentDef. metainfo['info']['url-compat'] = 1 # For testing EXISTING LIVE: ENABLE, for old EXISTING MERKLE: DISABLE #metainfo['info']['name.utf-8'] = metainfo['info']['name'] t = TorrentDef._create(metainfo) return t else: f = urlOpenTimeout(url) return TorrentDef._read(f)
def load_from_url(url): """ If the URL starts with 'http:' load a BT .torrent or Tribler .tstream file from the URL and convert it into a TorrentDef. If the URL starts with our URL scheme, we convert the URL to a URL-compatible TorrentDef. @param url URL @return TorrentDef. """ # Class method, no locking required if url.startswith(P2PURL_SCHEME): (metainfo, swarmid) = makeurl.p2purl2metainfo(url) # Metainfo created from URL, so create URL compatible TorrentDef. metainfo["info"]["url-compat"] = 1 # For testing EXISTING LIVE: ENABLE, for old EXISTING MERKLE: DISABLE # metainfo['info']['name.utf-8'] = metainfo['info']['name'] t = TorrentDef._create(metainfo) return t else: f = urlOpenTimeout(url) return TorrentDef._read(f)
def vod_event_callback(d, event, params): if event == VODEVENT_START: stream = params["stream"] # SWIFTPROC if stream is None: # Access swift HTTP interface directly stream = urlOpenTimeout(params["url"], timeout=30) # ARNOSMPTODO: available() grandtotal = 0L st = time.time() while True: global RATE total = 0 while total < int(RATE): data = stream.read(int(RATE)) total += len(data) grandtotal += total et = time.time() diff = max(et - st, 0.00001) grandrate = float(grandtotal) / diff print >> sys.stderr, "bitbucket: grandrate", grandrate, "~", RATE #,"avail",stream.available() time.sleep(1.0)
def getStatus(announce, url, info_hash, info_hashes): returndict = {info_hash: (0, 0)} try: resp = timeouturlopen.urlOpenTimeout(url, timeout=HTTP_TIMEOUT) response = resp.read() response_dict = bdecode(response) for cur_infohash, status in response_dict["files"].iteritems(): seeder = max(0, status["complete"]) leecher = max(0, status["incomplete"]) returndict[cur_infohash] = (seeder, leecher) registerSuccess(announce) return returndict except IOError: registerIOError(announce) return {info_hash: (-1, -1)} except KeyError: try: if "flags" in response_dict: # may be interval problem if "min_request_interval" in response_dict["flags"]: return {info_hash: (-3, -3)} except: pass except: pass return None
def getStatus(announce, url, info_hash, info_hashes): returndict = {} try: resp = timeouturlopen.urlOpenTimeout(url, timeout=HTTP_TIMEOUT) response = resp.read() response_dict = bdecode(response) for cur_infohash, status in response_dict["files"].iteritems(): seeder = max(0, status["complete"]) leecher = max(0, status["incomplete"]) returndict[cur_infohash] = (seeder, leecher) registerSuccess(announce) return returndict except IOError: registerIOError(announce) return {info_hash: (-1, -1)} except KeyError: try: if response_dict.has_key("flags"): # may be interval problem if response_dict["flags"].has_key("min_request_interval"): return {info_hash: (-3, -3)} except: pass except: pass return None
def vod_event_callback(d, event, params): if event == VODEVENT_START: stream = params["stream"] # SWIFTPROC if stream is None: # Access swift HTTP interface directly stream = urlOpenTimeout(params["url"], timeout=30) # ARNOSMPTODO: available() grandtotal = 0 st = time.time() while True: global RATE total = 0 while total < int(RATE): data = stream.read(int(RATE)) total += len(data) grandtotal += total et = time.time() diff = max(et - st, 0.00001) grandrate = float(grandtotal) / diff print >>sys.stderr, "bitbucket: grandrate", grandrate, "~", RATE # ,"avail",stream.available() time.sleep(1.0)
def reopen(self): while True: try: self.stream = urlOpenTimeout(self.url) # 30 sec timeout break except: print_exc() time.sleep(5.0) # No exp. backoff, get back ASAP
def _refresh(self): channel_url = None try: self.key_url_lock.acquire() channel_url = deepcopy(self.key_url) finally: self.key_url_lock.release() if channel_url: for key, urls in channel_url.iteritems(): if key in self.key_callbacks: for url in urls: if DEBUG: print >> sys.stderr, "RssParser: getting rss", url, len(urls) historyfile = self.gethistfilename(url, key) urls_already_seen = URLHistory(historyfile) urls_already_seen.read() newItems = self.readUrl(url, urls_already_seen) for title, new_urls, description, thumbnail in newItems: for new_url in new_urls: urls_already_seen.add(new_url) urls_already_seen.write() try: if DEBUG: print >> sys.stderr, "RssParser: trying", new_url referer = urlparse(new_url) referer = referer.scheme+"://"+referer.netloc+"/" stream = urlOpenTimeout(new_url, referer=referer) bdata = stream.read() stream.close() bddata = bdecode(bdata, 1) torrent = TorrentDef._create(bddata) def processCallbacks(key): for callback in self.key_callbacks[key]: try: callback(key, torrent, extraInfo = {'title':title, 'description': description, 'thumbnail': thumbnail}) except: print_exc() if self.remote_th.is_registered(): callback = lambda key=key: processCallbacks(key) self.remote_th.save_torrent(torrent, callback) else: processCallbacks(key) except: if DEBUG: print >> sys.stderr, "RssParser: could not download", new_url pass time.sleep(RSS_CHECK_FREQUENCY)
def _refresh(self): channel_url = None try: self.key_url_lock.acquire() channel_url = deepcopy(self.key_url) finally: self.key_url_lock.release() if channel_url: for key, urls in channel_url.iteritems(): if key in self.key_callbacks: for url in urls: if DEBUG: print >> sys.stderr, "RssParser: getting rss", url, len(urls) historyfile = self.gethistfilename(url, key) urls_already_seen = URLHistory(historyfile) urls_already_seen.read() newItems = self.readUrl(url, urls_already_seen) for title, new_urls, description, thumbnail in newItems: for new_url in new_urls: urls_already_seen.add(new_url) urls_already_seen.write() try: if DEBUG: print >> sys.stderr, "RssParser: trying", new_url referer = urlparse(new_url) referer = referer.scheme + "://" + referer.netloc + "/" stream = urlOpenTimeout(new_url, referer=referer) bdata = stream.read() stream.close() bddata = bdecode(bdata, 1) torrent = TorrentDef._create(bddata) def processCallbacks(key): for callback in self.key_callbacks[key]: try: callback(key, torrent, extraInfo={'title': title, 'description': description, 'thumbnail': thumbnail}) except: print_exc() if self.remote_th.is_registered(): callback = lambda key = key: processCallbacks(key) self.remote_th.save_torrent(torrent, callback) else: processCallbacks(key) except: if DEBUG: print >> sys.stderr, "RssParser: could not download", new_url pass time.sleep(RSS_CHECK_FREQUENCY)
def parse(self): self.title2entrymap = {} print >>sys.stderr,"feedp: Parsing",self.feedurl stream = urlOpenTimeout(self.feedurl,10) self.tree = etree.parse(stream) entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry') for entry in entries: titleelement = entry.find('{http://www.w3.org/2005/Atom}title') #print >> sys.stderr,"feedp: Got title",titleelement.text self.title2entrymap[titleelement.text] = entry
def parse(self): self.title2entrymap = {} print >> sys.stderr, "feedp: Parsing", self.feedurl stream = urlOpenTimeout(self.feedurl, 10) self.tree = etree.parse(stream) entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry') for entry in entries: titleelement = entry.find('{http://www.w3.org/2005/Atom}title') #print >> sys.stderr,"feedp: Got title",titleelement.text self.title2entrymap[titleelement.text] = entry
def load_from_url(url): """ Load a BT .torrent or Tribler .tribe file from the URL and convert it into a TorrentDef. @param url URL @return TorrentDef. """ # Class method, no locking required f = urlOpenTimeout(url) return TorrentDef._read(f)
def parse(self): self.feedurls = [] stream = urlOpenTimeout(self.metafeedurl,10) self.tree = etree.parse(stream) entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry') for entry in entries: titleelement = entry.find('{http://www.w3.org/2005/Atom}title') linkelement = entry.find('{http://www.w3.org/2005/Atom}link') if linkelement is not None: if linkelement.attrib['type'] == 'application/atom+xml': # Got feed feedurl = linkelement.attrib['href'] self.feedurls.append(feedurl)
def parse(self): self.feedurls = [] stream = urlOpenTimeout(self.metafeedurl, 10) self.tree = etree.parse(stream) entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry') for entry in entries: titleelement = entry.find('{http://www.w3.org/2005/Atom}title') linkelement = entry.find('{http://www.w3.org/2005/Atom}link') if linkelement is not None: if linkelement.attrib['type'] == 'application/atom+xml': # Got feed feedurl = linkelement.attrib['href'] self.feedurls.append(feedurl)
def getIPInfoByURL(url, proxy=None): """ Get IP location by visit some ip search engine page """ #TODO: getIPInfoByURL with Proxy support #Known urls: http://www.hostip.info/api/get.html?ip=xxx&position=true # http://www.melissadata.com/Lookups/iplocation.asp?ipaddress=xxx&submit=submit (using IP2Location database without coordinate) try: file = timeouturlopen.urlOpenTimeout(url,timeout=2) ip_info = file.read() except: if DEBUG: print >> sys.stderr,"ipinfo: getIPInfoByURL failed: cannot access", url raise Exception return ip_info
def hasNewVersion(self): my_version = self.utility.getVersion() try: # Arno: TODO: don't let this be done by MainThread curr_status = urlOpenTimeout('http://tribler.org/version/').readlines() line1 = curr_status[0] if len(curr_status) > 1: self.update_url = curr_status[1].strip() else: self.update_url = 'http://tribler.org/' _curr_status = line1.split() self.curr_version = _curr_status[0] return self.newversion(self.curr_version, my_version) except: print_exc() return False
def getStatus(url, info_hash): try: resp = timeouturlopen.urlOpenTimeout(url,timeout=HTTP_TIMEOUT) response = resp.read() except IOError: # print "IOError" return (-1, -1) # unknown except AttributeError: # print "AttributeError" return (-2, -2) # dead try: response_dict = bdecode(response) except: # print "DeCode Error " + response return (-2, -2) # dead try: status = response_dict["files"][info_hash] seeder = status["complete"] if seeder < 0: seeder = 0 leecher = status["incomplete"] if leecher < 0: leecher = 0 except KeyError: # print "KeyError " + info_hash + str(response_dict) try: if response_dict.has_key("flags"): # may be interval problem if response_dict["flags"].has_key("min_request_interval"): # print "interval problem" return (-3 ,-3) except: pass # print "KeyError " + info_hash + str(response_dict) return (-2, -2) # dead return (seeder, leecher)
def loadMetadata(self,data): """ Called by non-GUI thread """ if DEBUG: print >>sys.stderr,"subip: ThumbnailViewer: loadMetadata: url",data['url'] mimetype = None bmpdata = None if not ('persistent' in data): try: t = urlparse.urlparse(data['url']) #print >>sys.stderr,"subip: ThumbnailViewer: loadMetadata: parsed url",t newurl = t[0]+'://'+t[1]+'/'+'favicon.ico' if DEBUG: print >>sys.stderr,"subip: ThumbnailViewer: loadMetadata: newurl",newurl stream = urlOpenTimeout(newurl,timeout=5) mimetype = 'image/x-ico' # 'image/vnd.microsoft.icon' # 'image/ico' bmpdata = stream.read() stream.close() except: print_exc() wx.CallAfter(self.metadata_thread_gui_callback,data,mimetype,bmpdata)
def loadMetadata(self, data): """ Called by non-GUI thread """ if DEBUG: print >> sys.stderr, "subip: ThumbnailViewer: loadMetadata: url", data[ 'url'] mimetype = None bmpdata = None if not ('persistent' in data): try: t = urlparse.urlparse(data['url']) #print >>sys.stderr,"subip: ThumbnailViewer: loadMetadata: parsed url",t newurl = t[0] + '://' + t[1] + '/' + 'favicon.ico' if DEBUG: print >> sys.stderr, "subip: ThumbnailViewer: loadMetadata: newurl", newurl stream = urlOpenTimeout(newurl, timeout=5) mimetype = 'image/x-ico' # 'image/vnd.microsoft.icon' # 'image/ico' bmpdata = stream.read() stream.close() except: print_exc() wx.CallAfter(self.metadata_thread_gui_callback, data, mimetype, bmpdata)
def _rerequest_single(self, t, s, l, callback): if prctlimported: prctl.set_name("Tribler"+currentThread().getName()) try: closer = [None] def timedout(self = self, l = l, closer = closer): if self.lock.trip(l): self.errorcodes['troublecode'] = 'Problem connecting to tracker - timeout exceeded' self.lock.unwait(l) try: closer[0]() except: pass self.externalsched(timedout, self.timeout) err = None try: if DEBUG: print >>sys.stderr,"Rerequest tracker:" print >>sys.stderr,merge_announce(t, s) h = urlOpenTimeout(merge_announce(t, s)) closer[0] = h.close data = h.read() except (IOError, error), e: err = 'Problem connecting to tracker - ' + str(e) if DEBUG: print_exc() except: err = 'Problem connecting to tracker' if DEBUG: print_exc() #if DEBUG: # print >>sys.stderr,"rerequest: Got data",data try: h.close() except: pass if err: if self.lock.trip(l): self.errorcodes['troublecode'] = err self.lock.unwait(l) return if not data: if self.lock.trip(l): self.errorcodes['troublecode'] = 'no data from tracker' self.lock.unwait(l) return try: r = bdecode(data, sloppy=1) if DEBUG: print >>sys.stderr,"Rerequester: Tracker returns:", r check_peers(r) #print >>sys.stderr,"Rerequester: Tracker returns, post check done" except ValueError, e: if DEBUG: print_exc() if self.lock.trip(l): self.errorcodes['bad_data'] = 'bad data from tracker - ' + str(e) self.lock.unwait(l) return
def refresh(self): """Returns a generator for a list of (title,urllib2openedurl_to_torrent) pairs for this feed. TorrentFeedReader instances keep a list of torrent urls in memory and will yield a torrent only once. If the feed points to a torrent url with webserver problems, that url will not be retried. urllib2openedurl_to_torrent may be None if there is a webserver problem. """ # Load history from disk if not self.urls_already_seen.readed: self.urls_already_seen.read() self.urls_already_seen.readed = True feed_socket = urlOpenTimeout(self.feed_url,timeout=5) feed_xml = feed_socket.read() feed_socket.close() #if DEBUG: # print "<mluc> feed.refresh read xml:",feed_xml feed_dom = parseString(feed_xml) entries = [(title,link) for title,link in [(item.getElementsByTagName("title")[0].childNodes[0].data, item.getElementsByTagName("link")[0].childNodes[0].data) for item in feed_dom.getElementsByTagName("item")] if link.endswith(".torrent") and not self.urls_already_seen.contains(link)] # vuze feeds contain <entry> tags instead of <item> tags which includes # a <content> tags that contain the link to the torrent file as an # attribute. Support them especially for item in feed_dom.getElementsByTagName("entry"): title = item.getElementsByTagName("title")[0].childNodes[0].data #print "ENCLOSURE",item.getElementsByTagName("content") k = item.getElementsByTagName("content").length #print "ENCLOSURE LEN",k for i in range(k): child = item.getElementsByTagName("content").item(i) #print "ENCLOSURE CHILD",`child` if child.hasAttribute("src"): link = child.getAttribute("src") #print "ENCLOSURE CHILD getattrib",link if not self.urls_already_seen.contains(link): entries.append((title,link)) #else: # print "ENCLOSURE CHILD NO src" if DEBUG: print >>sys.stderr,"subscrip: Parse of RSS returned",len(entries),"previously unseen torrents" # for title,link in entries: # print "Link",link,"is in cache?",self.urls_already_seen.contains(link) # # return for title,link in entries: # print title,link try: self.urls_already_seen.add(link) if DEBUG: print >>sys.stderr,"subscrip: Opening",link html_or_tor = urlOpenTimeout(link,timeout=5) found_torrent = False tor_type = html_or_tor.headers.gettype() if self.isTorrentType(tor_type): torrent = html_or_tor found_torrent = True if DEBUG: print >>sys.stderr,"subscrip: Yielding",link yield title,torrent elif False: # 'html' in tor_type: html = html_or_tor.read() hrefs = [match.group(1) for match in self.href_re.finditer(html)] urls = [] for url in hrefs: if not self.urls_already_seen.contains(url): self.urls_already_seen.add(url) urls.append(urlparse.urljoin(link,url)) for url in urls: #print url try: if DEBUG: print >>sys.stderr,"subscrip: Opening",url torrent = urlOpenTimeout(url) url_type = torrent.headers.gettype() #print url_type if self.isTorrentType(url_type): #print "torrent found:",url found_torrent = True if DEBUG: print >>sys.stderr,"subscrip: Yielding",url yield title,torrent break else: #its not a torrent after all, but just some html link pass except: #url didn't open pass if not found_torrent: yield title,None except: traceback.print_exc() yield title,None
def _rerequest_single(self, t, s, l, callback): if prctlimported: prctl.set_name("Tribler" + currentThread().getName()) try: closer = [None] def timedout(self=self, l=l, closer=closer): if self.lock.trip(l): self.errorcodes[ 'troublecode'] = 'Problem connecting to tracker - timeout exceeded' self.lock.unwait(l) try: closer[0]() except: pass self.externalsched(timedout, self.timeout) err = None try: if DEBUG: print >> sys.stderr, "Rerequest tracker:" print >> sys.stderr, merge_announce(t, s) h = urlOpenTimeout(merge_announce(t, s)) closer[0] = h.close data = h.read() except (IOError, error), e: err = 'Problem connecting to tracker - ' + str(e) if DEBUG: print_exc() except: err = 'Problem connecting to tracker' if DEBUG: print_exc() #if DEBUG: # print >>sys.stderr,"rerequest: Got data",data try: h.close() except: pass if err: if self.lock.trip(l): self.errorcodes['troublecode'] = err self.lock.unwait(l) return if not data: if self.lock.trip(l): self.errorcodes['troublecode'] = 'no data from tracker' self.lock.unwait(l) return try: r = bdecode(data, sloppy=1) if DEBUG: print >> sys.stderr, "Rerequester: Tracker returns:", r check_peers(r) #print >>sys.stderr,"Rerequester: Tracker returns, post check done" except ValueError, e: if DEBUG: print_exc() if self.lock.trip(l): self.errorcodes[ 'bad_data'] = 'bad data from tracker - ' + str(e) self.lock.unwait(l) return
def refresh(self): """Returns a generator for a list of (title,urllib2openedurl_to_torrent) pairs for this feed. TorrentFeedReader instances keep a list of torrent urls in memory and will yield a torrent only once. If the feed points to a torrent url with webserver problems, that url will not be retried. urllib2openedurl_to_torrent may be None if there is a webserver problem. """ # Load history from disk if not self.urls_already_seen.readed: self.urls_already_seen.read() self.urls_already_seen.readed = True feed_socket = urlOpenTimeout(self.feed_url, timeout=5) feed_xml = feed_socket.read() feed_socket.close() #if DEBUG: # print "<mluc> feed.refresh read xml:",feed_xml feed_dom = parseString(feed_xml) entries = [(title, link) for title, link in [(item.getElementsByTagName("title")[0].childNodes[0].data, item.getElementsByTagName("link")[0].childNodes[0].data) for item in feed_dom.getElementsByTagName("item")] if link.endswith(".torrent") and not self.urls_already_seen.contains(link)] # vuze feeds contain <entry> tags instead of <item> tags which includes # a <content> tags that contain the link to the torrent file as an # attribute. Support them especially for item in feed_dom.getElementsByTagName("entry"): title = item.getElementsByTagName("title")[0].childNodes[0].data #print "ENCLOSURE",item.getElementsByTagName("content") k = item.getElementsByTagName("content").length #print "ENCLOSURE LEN",k for i in range(k): child = item.getElementsByTagName("content").item(i) #print "ENCLOSURE CHILD",`child` if child.hasAttribute("src"): link = child.getAttribute("src") #print "ENCLOSURE CHILD getattrib",link if not self.urls_already_seen.contains(link): entries.append((title, link)) #else: # print "ENCLOSURE CHILD NO src" if DEBUG: print >> sys.stderr, "subscrip: Parse of RSS returned", len( entries), "previously unseen torrents" # for title,link in entries: # print "Link",link,"is in cache?",self.urls_already_seen.contains(link) # # return for title, link in entries: # print title,link try: self.urls_already_seen.add(link) if DEBUG: print >> sys.stderr, "subscrip: Opening", link html_or_tor = urlOpenTimeout(link, timeout=5) found_torrent = False tor_type = html_or_tor.headers.gettype() if self.isTorrentType(tor_type): torrent = html_or_tor found_torrent = True if DEBUG: print >> sys.stderr, "subscrip: Yielding", link yield title, torrent elif False: # 'html' in tor_type: html = html_or_tor.read() hrefs = [ match.group(1) for match in self.href_re.finditer(html) ] urls = [] for url in hrefs: if not self.urls_already_seen.contains(url): self.urls_already_seen.add(url) urls.append(urlparse.urljoin(link, url)) for url in urls: #print url try: if DEBUG: print >> sys.stderr, "subscrip: Opening", url torrent = urlOpenTimeout(url) url_type = torrent.headers.gettype() #print url_type if self.isTorrentType(url_type): #print "torrent found:",url found_torrent = True if DEBUG: print >> sys.stderr, "subscrip: Yielding", url yield title, torrent break else: #its not a torrent after all, but just some html link pass except: #url didn't open pass if not found_torrent: yield title, None except: traceback.print_exc() yield title, None
def refresh(self): """Returns a generator for a list of (title,urllib2openedurl_to_torrent) pairs for this feed. TorrentFeedReader instances keep a list of torrent urls in memory and will yield a torrent only once. If the feed points to a torrent url with webserver problems, that url will not be retried. urllib2openedurl_to_torrent may be None if there is a webserver problem. """ # Load history from disk if not self.urls_already_seen.readed: self.urls_already_seen.read() self.urls_already_seen.readed = True while True: try: feed_socket = urlOpenTimeout(self.feed_url, timeout=20) feed_xml = feed_socket.read() feed_socket.close() break except: yield None, None return # 14/07/08 boudewijn: some special characters and html code is # raises a parser exception. We filter out these character # sequenses using a regular expression in the filter_xml # function dom = parseString(self._filter_xml(feed_xml)) entries = [] # The following XML will result in three links with the same title. # # <item> # <title>The title</title> # <link>http:/frayja.com/torrent/1</link> # <foobar src="frayja.com/torrent/2">Unused title</foobar> # <moomilk url="frayja.com/torrent/3">Unused title</moomilk> # </items> for item in dom.getElementsByTagName( "item"): #+ dom.getElementsByTagName("entry"): title = None links = [] child = item.firstChild while child: if child.nodeType == 1: # ELEMENT_NODE (according to the DOM standard) if child.nodeName == "title" and child.firstChild: title = child.firstChild.data if child.nodeName == "link" and child.firstChild: links.append(child.firstChild.data) if child.hasAttribute("src"): links.append(child.getAttribute("src")) if child.hasAttribute("url"): links.append(child.getAttribute("url")) child = child.nextSibling if title and links: entries.extend([(title, link) for link in links if not self.urls_already_seen.contains(link)]) if DEBUG: print >> sys.stderr, "subscrip: Parse of RSS returned", len( entries), "previously unseen torrents" for title, link in entries: # print title,link try: self.urls_already_seen.add(link) if DEBUG: print >> sys.stderr, "subscrip: Opening", title, link html_or_tor = urlOpenTimeout(link, timeout=20) found_torrent = False tor_type = html_or_tor.headers.gettype() if self.isTorrentType(tor_type): torrent = html_or_tor found_torrent = True if DEBUG: print >> sys.stderr, "subscrip: torrent1: Yielding", link yield title, torrent elif False: # 'html' in tor_type: html = html_or_tor.read() hrefs = [ match.group(1) for match in self.href_re.finditer(html) ] urls = [] for url in hrefs: if not self.urls_already_seen.contains(url): self.urls_already_seen.add(url) urls.append(urlparse.urljoin(link, url)) for url in urls: #print url try: if DEBUG: print >> sys.stderr, "subscrip: torrent2: Opening", url torrent = urlOpenTimeout(url) url_type = torrent.headers.gettype() #print url_type if self.isTorrentType(url_type): #print "torrent found:",url found_torrent = True if DEBUG: print >> sys.stderr, "subscrip: torrent2: Yielding", url yield title, torrent break else: #its not a torrent after all, but just some html link if DEBUG: print >> sys.stderr, "subscrip:%s not a torrent" % url except: #url didn't open if DEBUG: print >> sys.stderr, "subscrip:%s did not open" % url if not found_torrent: yield title, None except GeneratorExit: if DEBUG: print >> sys.stderr, "subscrip:GENERATOREXIT" # the generator is destroyed. we accept this by returning return except Exception, e: print >> sys.stderr, "rss_client:", e yield title, None
def refresh(self): """Returns a generator for a list of (title,urllib2openedurl_to_torrent) pairs for this feed. TorrentFeedReader instances keep a list of torrent urls in memory and will yield a torrent only once. If the feed points to a torrent url with webserver problems, that url will not be retried. urllib2openedurl_to_torrent may be None if there is a webserver problem. """ # Load history from disk if not self.urls_already_seen.readed: self.urls_already_seen.read() self.urls_already_seen.readed = True while True: try: feed_socket = urlOpenTimeout(self.feed_url,timeout=20) feed_xml = feed_socket.read() feed_socket.close() break except: yield None, None return # 14/07/08 boudewijn: some special characters and html code is # raises a parser exception. We filter out these character # sequenses using a regular expression in the filter_xml # function dom = parseString(self._filter_xml(feed_xml)) entries = [] # The following XML will result in three links with the same title. # # <item> # <title>The title</title> # <link>http:/frayja.com/torrent/1</link> # <foobar src="frayja.com/torrent/2">Unused title</foobar> # <moomilk url="frayja.com/torrent/3">Unused title</moomilk> # </items> for item in dom.getElementsByTagName("item"): #+ dom.getElementsByTagName("entry"): title = None links = [] child = item.firstChild while child: if child.nodeType == 1: # ELEMENT_NODE (according to the DOM standard) if child.nodeName == "title" and child.firstChild: title = child.firstChild.data if child.nodeName == "link" and child.firstChild: links.append(child.firstChild.data) if child.hasAttribute("src"): links.append(child.getAttribute("src")) if child.hasAttribute("url"): links.append(child.getAttribute("url")) child = child.nextSibling if title and links: entries.extend([(title, link) for link in links if not self.urls_already_seen.contains(link)]) if DEBUG: print >>sys.stderr,"subscrip: Parse of RSS returned",len(entries),"previously unseen torrents" for title,link in entries: # print title,link try: self.urls_already_seen.add(link) if DEBUG: print >>sys.stderr,"subscrip: Opening",title,link html_or_tor = urlOpenTimeout(link,timeout=20) found_torrent = False tor_type = html_or_tor.headers.gettype() if self.isTorrentType(tor_type): torrent = html_or_tor found_torrent = True if DEBUG: print >>sys.stderr,"subscrip: torrent1: Yielding",link yield title,torrent elif False: # 'html' in tor_type: html = html_or_tor.read() hrefs = [match.group(1) for match in self.href_re.finditer(html)] urls = [] for url in hrefs: if not self.urls_already_seen.contains(url): self.urls_already_seen.add(url) urls.append(urlparse.urljoin(link,url)) for url in urls: #print url try: if DEBUG: print >>sys.stderr,"subscrip: torrent2: Opening",url torrent = urlOpenTimeout(url) url_type = torrent.headers.gettype() #print url_type if self.isTorrentType(url_type): #print "torrent found:",url found_torrent = True if DEBUG: print >>sys.stderr,"subscrip: torrent2: Yielding",url yield title,torrent break else: #its not a torrent after all, but just some html link if DEBUG: print >>sys.stderr,"subscrip:%s not a torrent" % url except: #url didn't open if DEBUG: print >>sys.stderr,"subscrip:%s did not open" % url if not found_torrent: yield title,None except GeneratorExit: if DEBUG: print >>sys.stderr,"subscrip:GENERATOREXIT" # the generator is destroyed. we accept this by returning return except Exception, e: print >> sys.stderr, "rss_client:", e yield title,None