def download(db, grabber, downType): progress = Utils.getProgress() folder = Config.tf1Folder localName = os.path.join(folder, "news.json") f = Utils.download(grabber, progress, newsUrl, localName, downType, "utf-8", True) processNews(grabber, f, folder, progress, downType, db) localName = os.path.join(folder, "programs.json") f = Utils.download(grabber, progress, programsUrl, localName, downType, "utf-8", True) processPrograms(grabber, f, folder, progress, downType, db)
def download(db, grabber, term, downType): dataUrl = RAIUrls.getSearchUrl(term, 100) folder = Config.searchFolder localFilename = os.path.join(folder, term + ".json") f = Utils.download(grabber, None, dataUrl, localFilename, downType, "utf-8") process(grabber, f, db)
def download(db, grabber, downType): progress = Utils.getProgress() name = Utils.httpFilename(RAIUrls.info) folder = Config.tgFolder localName = os.path.join(folder, name) f = Utils.download(grabber, progress, RAIUrls.info, localName, downType, "utf-8", True) process(grabber, progress, downType, f, db)
def processItem(grabber, progress, downType, title, time, url, db): folder = Config.tgFolder name = Utils.httpFilename(url) localName = os.path.join(folder, name) f = Utils.download(grabber, progress, url, localName, downType, "utf-8", True) if f: processSet(grabber, title, time, f, db)
def download(db, grabber, downType): page = Utils.httpFilename(RAIUrls.junior) folder = Config.juniorFolder localFilename = os.path.join(folder, page) progress = Utils.getProgress() f = Utils.download(grabber, progress, RAIUrls.junior, localFilename, downType, None, True) process(grabber, progress, folder, f, db, downType)
def download(db, grabber, downType): page = Utils.httpFilename(RAIUrls.onDemand) folder = Config.demandFolder localFilename = os.path.join(folder, page) progress = Utils.getProgress() f = Utils.download(grabber, progress, RAIUrls.onDemand, localFilename, downType, "raw-unicode-escape", True) process(grabber, f, db)
def downloadGroup(grabber, name, groupId, folder, progress, downType, db): # we set it to True as this is a group # and subject to continuous changes checkTimestamp = True # .0 url_0 = getDataUrl(groupId, 0) localName_0 = os.path.join(folder, str(groupId) + ".0.json") f_0 = Utils.download(grabber, progress, url_0, localName_0, downType, "utf-8", checkTimestamp) if f_0: processGroup(grabber, f_0, name, db) # .1 url_1 = getDataUrl(groupId, 1) localName_1 = os.path.join(folder, str(groupId) + ".1.json") f_1 = Utils.download(grabber, progress, url_1, localName_1, downType, "utf-8", checkTimestamp) if f_1: processGroup(grabber, f_1, name, db)
def __init__(self, grabber, url, downType, pid): super(Demand, self).__init__() self.grabber = grabber parts = urllib.parse.urlparse(url) if not parts.scheme: url = RAIUrls.getItemUrl(url) self.url = url self.pid = pid folder = Config.itemFolder localFilename = os.path.join(folder, Utils.httpFilename(self.url)) f = Utils.download(grabber, None, self.url, localFilename, downType, "utf-8") parser = VideoHTMLParser() parser.feed(f.read()) self.values = parser.values self.channel = "item" self.title = self.values.title self.ts = self.values.videoUrlM3U8 Utils.addH264Url(self.h264, 0, self.values.videoUrlH264) if self.values.date: self.datetime = datetime.datetime.strptime(self.values.date, "%d/%m/%Y") self.mms = None if self.values.type and self.values.type != "Video": # this is a case of a Photogallery self.url = None self.filename = None return if not self.values.videoUrl: self.values.videoUrl = self.values.videoPath #sometimes we get .mp4 which does not work self.values.videoUrl = self.values.videoUrl.replace( "relinkerServlet.mp4", "relinkerServlet.htm") #make a nice filename self.filename = Utils.makeFilename(self.title) self.mms = self.values.videoUrl
def download(db, grabber, downType): progress = Utils.getProgress() for channel in channels: url = getCatalogueUrl(channel) name = Utils.httpFilename(url) + "." + channel folder = Config.m6Folder localName = os.path.join(folder, name) f = Utils.download(grabber, progress, url, localName, downType, "utf-8", True) if (f): process(grabber, downType, f, channel, db)
def processPage(grabber, progress, folder, f, db, downType): root = ElementTree.parse(f).getroot().find('menu') for e in root: if e.tag == "item" and e.attrib.get("id") == "video": path = e.find("src").attrib.get("path") url = RAIUrls.getJuniorBlock(path) name = Utils.httpFilename(url) localFilename = os.path.join(folder, name) g = Utils.download(grabber, progress, url, localFilename, downType, None, True) if g: processBlock(grabber, progress, folder, g, db, downType)
def __init__(self, grabber, url, downType, pid): super(Demand, self).__init__() self.grabber = grabber parts = urllib.parse.urlparse(url) if not parts.scheme: url = RAIUrls.getItemUrl(url) self.url = url self.pid = pid folder = Config.itemFolder localFilename = os.path.join(folder, Utils.httpFilename(self.url)) f = Utils.download(grabber, None, self.url, localFilename, downType, "utf-8") parser = VideoHTMLParser() parser.feed(f.read()) self.values = parser.values self.channel = "item" self.title = self.values.title self.ts = self.values.videoUrlM3U8 Utils.addH264Url(self.h264, 0, self.values.videoUrlH264) if self.values.date: self.datetime = datetime.datetime.strptime(self.values.date, "%d/%m/%Y") self.mms = None if self.values.type and self.values.type != "Video": # this is a case of a Photogallery self.url = None self.filename = None return if not self.values.videoUrl: self.values.videoUrl = self.values.videoPath #sometimes we get .mp4 which does not work self.values.videoUrl = self.values.videoUrl.replace("relinkerServlet.mp4", "relinkerServlet.htm") #make a nice filename self.filename = Utils.makeFilename(self.title) self.mms = self.values.videoUrl
def downloadItems(grabber, url, which, conf, folder, progress, downType, db): name = Utils.httpFilename(url) localName = os.path.join(folder, name) f = Utils.download(grabber, progress, url, localName, downType, "utf-8", True) if f: if which == FULL_VIDEO: processFullVideo(grabber, f, "episodi_interi", conf, folder, progress, downType, db) elif which == PROGRAM_LIST: processProgramList(grabber, f, conf, folder, progress, downType, db) elif which == PROGRAM: processProgram(grabber, f, conf, folder, progress, downType, db) elif which == PROGRAM_VIDEO: processFullVideo(grabber, f, "brand", conf, folder, progress, downType, db)
def getTS(self): if self.ts: return self.ts folder = Config.m6Folder name = Utils.httpFilename(self.url) localName = os.path.join(folder, name) progress = Utils.getProgress() f = Utils.download(self.grabber, progress, self.url, localName, self.downType, "utf-8", True) if (f): root = ElementTree.parse(f).getroot() asset = root.find("asset") for v in asset.findall("assetItem"): u = v.find("url").text self.ts = getTSUrl(u) return self.ts
def download(db, grabber, downType): progress = Utils.getProgress() name = Utils.httpFilename(infoUrl) folder = Config.pluzzFolder localName = os.path.join(folder, name) f = Utils.download(grabber, progress, infoUrl, localName, downType, None, True) z = zipfile.ZipFile(f, "r") decoder = codecs.getreader("ascii") for a in z.namelist(): if a.find("catch_up_") == 0: with z.open(a) as f: process(grabber, decoder(f), db)
def download(db, grabber, downType, mediasetType): progress = Utils.getProgress() name = Utils.httpFilename(configUrl) folder = Config.mediasetFolder localName = os.path.join(folder, name) f = Utils.download(grabber, progress, configUrl, localName, downType, None, True) s = f.read().strip() root = ElementTree.fromstring(s) conf = parseConfig(root) if mediasetType == "tg5": url = conf["FullVideoRequestUrl"].replace("http://ww.", "http://www.") downloadItems(grabber, url, FULL_VIDEO, conf, folder, progress, downType, db) else: url = conf["ProgramListRequestUrl"] downloadItems(grabber, url, PROGRAM_LIST, conf, folder, progress, downType, db)
def process(grabber, progress, folder, f, db, downType): root = ElementTree.parse(f).getroot() for e in root: if e.tag == "elemento": uniqueNameNode = e.find("uniqueName") if uniqueNameNode is not None: uniqueName = uniqueNameNode.text if uniqueName: url = RAIUrls.getJuniorPage(uniqueName) name = Utils.httpFilename(url) localFilename = os.path.join(folder, name) g = Utils.download(grabber, progress, url, localFilename, downType, None, True) if g: processPage(grabber, progress, folder, g, db, downType)
def download(db, grabber, url, downType): page = Utils.httpFilename(url) page = os.path.splitext(page)[0] dataUrl = RAIUrls.getPageDataUrl(page) folder = Config.pageFolder localFilename = os.path.join(folder, page + ".xml") f = Utils.download(grabber, None, dataUrl, localFilename, downType, "utf-8") # ElementTree does not like unicode, it prefers byte strings s = f.read().strip() s = Utils.removeInvalidXMLCharacters(s) root = ElementTree.fromstring(s) for child in root.findall("content"): pid = Utils.getNewPID(db, None) it = Elem(pid, grabber, child) Utils.addToDB(db, it)
def download(db, grabber, downType): progress = Utils.getProgress() today = datetime.date.today() folder = Config.replayFolder for x in range(1, 8): day = today - datetime.timedelta(days = x) strDate = day.strftime("_%Y_%m_%d") for channel in channels.values(): filename = channel + strDate + ".html" url = RAIUrls.replay + "/" + filename localName = os.path.join(folder, filename) f = Utils.download(grabber, progress, url, localName, downType, "utf-8") if f: process(grabber, f, db)
def download(db, grabber, downType): progress = Utils.getProgress() today = datetime.date.today() folder = Config.replayFolder for x in range(1, 8): day = today - datetime.timedelta(days=x) strDate = day.strftime("_%Y_%m_%d") for channel in channels.values(): filename = channel + strDate + ".html" url = RAIUrls.replay + "/" + filename localName = os.path.join(folder, filename) f = Utils.download(grabber, progress, url, localName, downType, "utf-8") if f: process(grabber, f, db)
def follow(self, db, downType): folder = Config.juniorFolder progress = Utils.getProgress() again = True url = self.url while again: name = Utils.httpFilename(url) localFilename = os.path.join(folder, name) f = Utils.download(self.grabber, progress, url, localFilename, downType, None, True) (index, total) = processSet(self.grabber, progress, folder, f, db, downType) if index + 1 == total: again = False else: # replace -V-0.xml -> -V-1.xml and so on pos = url.rfind("-V-") if pos == -1: again = False else: base = url[:pos] url = "{0}-V-{1}.xml".format(base, index + 1)