def __init__(self, pid, grabber, data): super(Elem, self).__init__() self.pid = pid self.title = data.findtext("titolo") self.description = data.findtext("descrizione") self.channel = data.findtext("dominio") self.grabber = grabber strTime = data.findtext("datapubblicazione") strTime = strTime.replace("-", "/") self.datetime = datetime.datetime.strptime(strTime, "%d/%m/%Y") # extra experimental data h264 = data.findtext("h264") Utils.addH264Url(self.h264, 0, h264) self.ts = data.findtext("m3u8") self.id = data.findtext("localid") self.length = data.findtext("durata") web = data.findtext("web") if not web: web = RAIUrls.getWebFromID(self.id) self.url = RAIUrls.base + web self.filename = Utils.makeFilename(self.title) self.canFollow = True
def download(db, grabber, term, downType): dataUrl = RAIUrls.getSearchUrl(term, 100) folder = Config.searchFolder localFilename = os.path.join(folder, term + ".json") f = Utils.download(grabber, None, dataUrl, localFilename, downType, "utf-8") process(grabber, f, db)
def __init__(self, grabber, url, downType, pid): super(Demand, self).__init__() self.grabber = grabber parts = urllib.parse.urlparse(url) if not parts.scheme: url = RAIUrls.getItemUrl(url) self.url = url self.pid = pid folder = Config.itemFolder localFilename = os.path.join(folder, Utils.httpFilename(self.url)) f = Utils.download(grabber, None, self.url, localFilename, downType, "utf-8") parser = VideoHTMLParser() parser.feed(f.read()) self.values = parser.values self.channel = "item" self.title = self.values.title self.ts = self.values.videoUrlM3U8 Utils.addH264Url(self.h264, 0, self.values.videoUrlH264) if self.values.date: self.datetime = datetime.datetime.strptime(self.values.date, "%d/%m/%Y") self.mms = None if self.values.type and self.values.type != "Video": # this is a case of a Photogallery self.url = None self.filename = None return if not self.values.videoUrl: self.values.videoUrl = self.values.videoPath #sometimes we get .mp4 which does not work self.values.videoUrl = self.values.videoUrl.replace( "relinkerServlet.mp4", "relinkerServlet.htm") #make a nice filename self.filename = Utils.makeFilename(self.title) self.mms = self.values.videoUrl
def handle_starttag(self, tag, attrs): if tag == "meta": val = self.extract(attrs, "videourl") if val: self.values.videoUrl = val val = self.extract(attrs, "videourl_mp4") if val: self.values.videoUrlMP4 = val val = self.extract(attrs, "videourl_h264") if val: self.values.videoUrlH264 = val val = self.extract(attrs, "videourl_m3u8") if val: self.values.videoUrlM3U8 = val val = self.extract(attrs, "title") if val: self.values.title = val val = self.extract(attrs, "programmaTV") if val: self.values.program = val val = self.extract(attrs, "description") if val: self.values.description = val val = self.extract(attrs, "tipo") if val: self.values.type = val val = self.extract(attrs, "itemDate") if val: self.values.date = val val = self.extract(attrs, "idPageProgramma") if val: self.values.page = RAIUrls.base + RAIUrls.getWebFromID(val) elif tag == "param": if len(attrs) > 0: if attrs[0][0] == "value": path = attrs[0][1] if path.find("videoPath") == 0: firstEqual = path.find("=") firstComma = path.find(",") self.values.videoPath = path[firstEqual + 1: firstComma]
def handle_starttag(self, tag, attrs): if tag == "meta": val = self.extract(attrs, "videourl") if val: self.values.videoUrl = val val = self.extract(attrs, "videourl_mp4") if val: self.values.videoUrlMP4 = val val = self.extract(attrs, "videourl_h264") if val: self.values.videoUrlH264 = val val = self.extract(attrs, "videourl_m3u8") if val: self.values.videoUrlM3U8 = val val = self.extract(attrs, "title") if val: self.values.title = val val = self.extract(attrs, "programmaTV") if val: self.values.program = val val = self.extract(attrs, "description") if val: self.values.description = val val = self.extract(attrs, "tipo") if val: self.values.type = val val = self.extract(attrs, "itemDate") if val: self.values.date = val val = self.extract(attrs, "idPageProgramma") if val: self.values.page = RAIUrls.base + RAIUrls.getWebFromID(val) elif tag == "param": if len(attrs) > 0: if attrs[0][0] == "value": path = attrs[0][1] if path.find("videoPath") == 0: firstEqual = path.find("=") firstComma = path.find(",") self.values.videoPath = path[firstEqual + 1:firstComma]
def processPage(grabber, progress, folder, f, db, downType): root = ElementTree.parse(f).getroot().find('menu') for e in root: if e.tag == "item" and e.attrib.get("id") == "video": path = e.find("src").attrib.get("path") url = RAIUrls.getJuniorBlock(path) name = Utils.httpFilename(url) localFilename = os.path.join(folder, name) g = Utils.download(grabber, progress, url, localFilename, downType, None, True) if g: processBlock(grabber, progress, folder, g, db, downType)
def __init__(self, grabber, url, downType, pid): super(Demand, self).__init__() self.grabber = grabber parts = urllib.parse.urlparse(url) if not parts.scheme: url = RAIUrls.getItemUrl(url) self.url = url self.pid = pid folder = Config.itemFolder localFilename = os.path.join(folder, Utils.httpFilename(self.url)) f = Utils.download(grabber, None, self.url, localFilename, downType, "utf-8") parser = VideoHTMLParser() parser.feed(f.read()) self.values = parser.values self.channel = "item" self.title = self.values.title self.ts = self.values.videoUrlM3U8 Utils.addH264Url(self.h264, 0, self.values.videoUrlH264) if self.values.date: self.datetime = datetime.datetime.strptime(self.values.date, "%d/%m/%Y") self.mms = None if self.values.type and self.values.type != "Video": # this is a case of a Photogallery self.url = None self.filename = None return if not self.values.videoUrl: self.values.videoUrl = self.values.videoPath #sometimes we get .mp4 which does not work self.values.videoUrl = self.values.videoUrl.replace("relinkerServlet.mp4", "relinkerServlet.htm") #make a nice filename self.filename = Utils.makeFilename(self.title) self.mms = self.values.videoUrl
def process(grabber, progress, folder, f, db, downType): root = ElementTree.parse(f).getroot() for e in root: if e.tag == "elemento": uniqueNameNode = e.find("uniqueName") if uniqueNameNode is not None: uniqueName = uniqueNameNode.text if uniqueName: url = RAIUrls.getJuniorPage(uniqueName) name = Utils.httpFilename(url) localFilename = os.path.join(folder, name) g = Utils.download(grabber, progress, url, localFilename, downType, None, True) if g: processPage(grabber, progress, folder, g, db, downType)
def download(db, grabber, url, downType): page = Utils.httpFilename(url) page = os.path.splitext(page)[0] dataUrl = RAIUrls.getPageDataUrl(page) folder = Config.pageFolder localFilename = os.path.join(folder, page + ".xml") f = Utils.download(grabber, None, dataUrl, localFilename, downType, "utf-8") # ElementTree does not like unicode, it prefers byte strings s = f.read().strip() s = Utils.removeInvalidXMLCharacters(s) root = ElementTree.fromstring(s) for child in root.findall("content"): pid = Utils.getNewPID(db, None) it = Elem(pid, grabber, child) Utils.addToDB(db, it)