def load_from_url(url): """ If the URL starts with 'http:' load a BT .torrent or Tribler .tstream file from the URL and convert it into a TorrentDef. If the URL starts with our URL scheme, we convert the URL to a URL-compatible TorrentDef. @param url URL @return TorrentDef. """ # Class method, no locking required if url.startswith(P2PURL_SCHEME): (metainfo,swarmid) = makeurl.p2purl2metainfo(url) # Metainfo created from URL, so create URL compatible TorrentDef. metainfo['info']['url-compat'] = 1 # For testing EXISTING LIVE: ENABLE, for old EXISTING MERKLE: DISABLE #metainfo['info']['name.utf-8'] = metainfo['info']['name'] t = TorrentDef._create(metainfo) return t else: f = urlOpenTimeout(url) return TorrentDef._read(f)
def load_from_url(url): """ If the URL starts with 'http:' load a BT .torrent or Tribler .tstream file from the URL and convert it into a TorrentDef. If the URL starts with our URL scheme, we convert the URL to a URL-compatible TorrentDef. @param url URL @return TorrentDef. """ # Class method, no locking required if url.startswith(P2PURL_SCHEME): (metainfo, swarmid) = makeurl.p2purl2metainfo(url) # Metainfo created from URL, so create URL compatible TorrentDef. metainfo['info']['url-compat'] = 1 # For testing EXISTING LIVE: ENABLE, for old EXISTING MERKLE: DISABLE #metainfo['info']['name.utf-8'] = metainfo['info']['name'] t = TorrentDef._create(metainfo) return t else: f = urlOpenTimeout(url) return TorrentDef._read(f)
def reopen(self): while True: try: self.stream = urlOpenTimeout(self.url) # 30 sec timeout break except: print_exc() time.sleep(5.0) # No exp. backoff, get back ASAP
def parse(self): self.title2entrymap = {} print >>sys.stderr,"feedp: Parsing",self.feedurl stream = urlOpenTimeout(self.feedurl,10) self.tree = etree.parse(stream) entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry') for entry in entries: titleelement = entry.find('{http://www.w3.org/2005/Atom}title') #print >> sys.stderr,"feedp: Got title",titleelement.text self.title2entrymap[titleelement.text] = entry
def parse(self): self.title2entrymap = {} print >> sys.stderr, "feedp: Parsing", self.feedurl stream = urlOpenTimeout(self.feedurl, 10) self.tree = etree.parse(stream) entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry') for entry in entries: titleelement = entry.find('{http://www.w3.org/2005/Atom}title') #print >> sys.stderr,"feedp: Got title",titleelement.text self.title2entrymap[titleelement.text] = entry
def parse(self): self.feedurls = [] stream = urlOpenTimeout(self.metafeedurl,10) self.tree = etree.parse(stream) entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry') for entry in entries: titleelement = entry.find('{http://www.w3.org/2005/Atom}title') linkelement = entry.find('{http://www.w3.org/2005/Atom}link') if linkelement is not None: if linkelement.attrib['type'] == 'application/atom+xml': # Got feed feedurl = linkelement.attrib['href'] self.feedurls.append(feedurl)
def parse(self): self.feedurls = [] stream = urlOpenTimeout(self.metafeedurl, 10) self.tree = etree.parse(stream) entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry') for entry in entries: titleelement = entry.find('{http://www.w3.org/2005/Atom}title') linkelement = entry.find('{http://www.w3.org/2005/Atom}link') if linkelement is not None: if linkelement.attrib['type'] == 'application/atom+xml': # Got feed feedurl = linkelement.attrib['href'] self.feedurls.append(feedurl)
def getStatus(url, info_hash): try: resp = timeouturlopen.urlOpenTimeout(url, timeout=HTTP_TIMEOUT) response = resp.read() except IOError: # print "IOError" return (-1, -1) # unknown except AttributeError: # print "AttributeError" return (-2, -2) # dead try: response_dict = bdecode(response) except: # print "DeCode Error " + response return (-2, -2) # dead try: status = response_dict["files"][info_hash] seeder = status["complete"] if seeder < 0: seeder = 0 leecher = status["incomplete"] if leecher < 0: leecher = 0 except KeyError: # print "KeyError " + info_hash + str(response_dict) try: if response_dict.has_key("flags"): # may be interval problem if response_dict["flags"].has_key("min_request_interval"): # print "interval problem" return (-3, -3) except: pass # print "KeyError " + info_hash + str(response_dict) return (-2, -2) # dead return (seeder, leecher)
def getStatus(url, info_hash): try: resp = timeouturlopen.urlOpenTimeout(url,timeout=HTTP_TIMEOUT) response = resp.read() except IOError: # print "IOError" return (-1, -1) # unknown except AttributeError: # print "AttributeError" return (-2, -2) # dead try: response_dict = bdecode(response) except: # print "DeCode Error " + response return (-2, -2) # dead try: status = response_dict["files"][info_hash] seeder = status["complete"] if seeder < 0: seeder = 0 leecher = status["incomplete"] if leecher < 0: leecher = 0 except KeyError: # print "KeyError " + info_hash + str(response_dict) try: if response_dict.has_key("flags"): # may be interval problem if response_dict["flags"].has_key("min_request_interval"): # print "interval problem" return (-3 ,-3) except: pass # print "KeyError " + info_hash + str(response_dict) return (-2, -2) # dead return (seeder, leecher)
def refresh(self): """Returns a generator for a list of (title,urllib2openedurl_to_torrent) pairs for this feed. TorrentFeedReader instances keep a list of torrent urls in memory and will yield a torrent only once. If the feed points to a torrent url with webserver problems, that url will not be retried. urllib2openedurl_to_torrent may be None if there is a webserver problem. """ # Load history from disk if not self.urls_already_seen.readed: self.urls_already_seen.read() self.urls_already_seen.readed = True while True: try: feed_socket = urlOpenTimeout(self.feed_url,timeout=20) feed_xml = feed_socket.read() feed_socket.close() break except: yield None, None # 14/07/08 boudewijn: some special characters and html code is # raises a parser exception. We filter out these character # sequenses using a regular expression in the filter_xml # function dom = parseString(self._filter_xml(feed_xml)) entries = [] # The following XML will result in three links with the same title. # # <item> # <title>The title</title> # <link>http:/frayja.com/torrent/1</link> # <foobar src="frayja.com/torrent/2">Unused title</foobar> # <moomilk url="frayja.com/torrent/3">Unused title</moomilk> # </items> for item in dom.getElementsByTagName("item"): #+ dom.getElementsByTagName("entry"): title = None links = [] child = item.firstChild while child: if child.nodeType == 1: # ELEMENT_NODE (according to the DOM standard) if child.nodeName == "title" and child.firstChild: title = child.firstChild.data if child.nodeName == "link" and child.firstChild: links.append(child.firstChild.data) if child.hasAttribute("src"): links.append(child.getAttribute("src")) if child.hasAttribute("url"): links.append(child.getAttribute("url")) child = child.nextSibling if title and links: entries.extend([(title, link) for link in links]) if DEBUG: print >>sys.stderr,time.asctime(),'-', "subscrip: Parse of RSS returned",len(entries),"previously unseen torrents" for title,link in entries: # print title,link try: self.urls_already_seen.add(link) if DEBUG: print >>sys.stderr,time.asctime(),'-', "subscrip: Opening",title,link html_or_tor = urlOpenTimeout(link,timeout=20) found_torrent = False tor_type = html_or_tor.headers.gettype() if self.isTorrentType(tor_type): torrent = html_or_tor found_torrent = True if DEBUG: print >>sys.stderr,time.asctime(),'-', "subscrip: torrent1: Yielding",link yield title,torrent elif False: # 'html' in tor_type: html = html_or_tor.read() hrefs = [match.group(1) for match in self.href_re.finditer(html)] urls = [] for url in hrefs: if not self.urls_already_seen.contains(url): self.urls_already_seen.add(url) urls.append(urlparse.urljoin(link,url)) for url in urls: #print url try: if DEBUG: print >>sys.stderr,time.asctime(),'-', "subscrip: torrent2: Opening",url torrent = urlOpenTimeout(url) url_type = torrent.headers.gettype() #print url_type if self.isTorrentType(url_type): #print "torrent found:",url found_torrent = True if DEBUG: print >>sys.stderr,time.asctime(),'-', "subscrip: torrent2: Yielding",url yield title,torrent break else: #its not a torrent after all, but just some html link if DEBUG: print >>sys.stderr,time.asctime(),'-', "subscrip:%s not a torrent" % url except: #url didn't open if DEBUG: print >>sys.stderr,time.asctime(),'-', "subscrip:%s did not open" % url if not found_torrent: yield title,None except GeneratorExit: if DEBUG: print >>sys.stderr,time.asctime(),'-', "subscrip:GENERATOREXIT" # the generator is destroyed. we accept this by returning return except Exception, e: print >> sys.stderr, time.asctime(),'-', "rss_client:", e yield title,None
def refresh(self): """Returns a generator for a list of (title,urllib2openedurl_to_torrent) pairs for this feed. TorrentFeedReader instances keep a list of torrent urls in memory and will yield a torrent only once. If the feed points to a torrent url with webserver problems, that url will not be retried. urllib2openedurl_to_torrent may be None if there is a webserver problem. """ # Load history from disk if not self.urls_already_seen.readed: self.urls_already_seen.read() self.urls_already_seen.readed = True feed_socket = urlOpenTimeout(self.feed_url, timeout=20) feed_xml = feed_socket.read() feed_socket.close() # 14/07/08 boudewijn: some special characters and html code is # raises a parser exception. We filter out these character # sequenses using a regular expression in the filter_xml # function dom = parseString(self._filter_xml(feed_xml)) entries = [] # The following XML will result in three links with the same title. # # <item> # <title>The title</title> # <link>http:/frayja.com/torrent/1</link> # <foobar src="frayja.com/torrent/2">Unused title</foobar> # <moomilk url="frayja.com/torrent/3">Unused title</moomilk> # </items> for item in dom.getElementsByTagName( "item"): #+ dom.getElementsByTagName("entry"): title = None links = [] child = item.firstChild while child: if child.nodeType == 1: # ELEMENT_NODE (according to the DOM standard) if child.nodeName == "title" and child.firstChild: title = child.firstChild.data if child.nodeName == "link" and child.firstChild: links.append(child.firstChild.data) if child.hasAttribute("src"): links.append(child.getAttribute("src")) if child.hasAttribute("url"): links.append(child.getAttribute("url")) child = child.nextSibling if title and links: entries.extend([(title, link) for link in links]) if DEBUG: print >> sys.stderr, "subscrip: Parse of RSS returned", len( entries), "previously unseen torrents" for title, link in entries: # print title,link try: self.urls_already_seen.add(link) if DEBUG: print >> sys.stderr, "subscrip: Opening", title, link html_or_tor = urlOpenTimeout(link, timeout=20) found_torrent = False tor_type = html_or_tor.headers.gettype() if self.isTorrentType(tor_type): torrent = html_or_tor found_torrent = True if DEBUG: print >> sys.stderr, "subscrip: torrent1: Yielding", link yield title, torrent elif False: # 'html' in tor_type: html = html_or_tor.read() hrefs = [ match.group(1) for match in self.href_re.finditer(html) ] urls = [] for url in hrefs: if not self.urls_already_seen.contains(url): self.urls_already_seen.add(url) urls.append(urlparse.urljoin(link, url)) for url in urls: #print url try: if DEBUG: print >> sys.stderr, "subscrip: torrent2: Opening", url torrent = urlOpenTimeout(url) url_type = torrent.headers.gettype() #print url_type if self.isTorrentType(url_type): #print "torrent found:",url found_torrent = True if DEBUG: print >> sys.stderr, "subscrip: torrent2: Yielding", url yield title, torrent break else: #its not a torrent after all, but just some html link if DEBUG: print >> sys.stderr, "%s not a torrent" % url except: #url didn't open if DEBUG: print >> sys.stderr, "%s did not open" % url if not found_torrent: yield title, None except GeneratorExit: if DEBUG: print >> sys.stderr, "GENERATOREXIT" # the generator is destroyed. we accept this by returning return except: traceback.print_exc() yield title, None