def grab(channel, timespan): tz = pytz.timezone("Europe/Athens") now = datetime.datetime.now(tz) charset = "windows-1253" shows = [] a = 0 if now.time().hour < 7: a = -1 for i in range(a, 14): date = now + datetime.timedelta(days=i) text = helper.download("https://program.ert.gr/Ert1/index.asp?id=" + channel + "&pdate=" + date.strftime("%d/%m/%Y"), encoding=charset) if text is None: continue sections = helper.split( text, "<td width=\"50\" align=\"center\" class=\"table\">", "</tr></table>") laststart = datetime.datetime.min.replace(tzinfo=tz) for section in sections: show = {} temp = re.search("(\d\d):(\d\d)", section) show["start"] = date.replace(hour=int(temp.group(1)), minute=int(temp.group(2)), second=0, microsecond=0) if show["start"] < laststart: date += datetime.timedelta(days=1) show["start"] += datetime.timedelta(days=1) if (show["start"] - now).total_seconds() / 3600 > timespan: lastshow = True else: lastshow = False laststart = show["start"] temp = re.search("<a class=\"black\".*href=\"(.*)\">(.*)</a>", section) show["title"] = temp.group(2) subtitle = helper.cut( section, "<td width=\"3\"></td><td><font color=\"#6e6868\">", "</font>") if subtitle is not None and subtitle: show["sub-title"] = subtitle link = temp.group(1) if link[0] == "/": link = "https://program.ert.gr" + link if link: show["details-url"] = link shows.append(show) if lastshow: return shows return shows
def grabdetails(url): charset = "windows-1253" text = helper.download(url, encoding=charset) if text is None: return None show = {} temp = helper.split(text, "<div align=\"justify\" class=\"black\">", "</div>") description = "" for d in temp: description += d if description: show["desc"] = helper.cleanup(description) director = re.search("Σκηνοθεσία</b>: (.*?)(?:\n|<br>)", text) if director is not None: show["director"] = helper.cleanup(director.group(1)) presenter = re.search("Παρουσίαση</b>: (.*?)(?:\n|<br>)", text) if presenter is not None: show["presenter"] = helper.cleanup(presenter.group(1)) producer = re.search("Οργάνωση παραγωγής: (.*?)(?:\n|<br>)", text) if producer is not None: show["producer"] = helper.cleanup(producer.group(1)) writer = re.search("Αρχισυνταξία: (.*?)(?:\n|<br>)", text) if writer is not None: show["writer"] = helper.cleanup(writer.group(1)) return show
def channellist(): text = helper.download("http://programm.ard.de/") channels = helper.split(text, "Tagesprogramm::", "</a>") result = [] for channel in channels: temp = re.search("Tagesprogramm::(.*?)\".*\?sender\=-?(.*?)\&", channel) result.append((temp.group(2), temp.group(1), temp.group(1))) return result
def channellist(): text = helper.download("http://www.ishow.gr/tvNow.asp") channels = helper.split(text, "<b><a style=\"color:#E1D8BE\"", "</a>") result = [] for channel in channels: temp = re.search("\?cid=(.*?)\">(.*)</a>", channel) result.append((temp.group(1), temp.group(2), temp.group(2))) result.sort(key=lambda r: int(r[0])) return result
def grab(channel, timespan): tz = pytz.timezone("Europe/Athens") now = datetime.datetime.now(tz) shows = [] a = 0 if now.time().hour < 4: a = -1 for i in range(a, 6): date = now + datetime.timedelta(days=i) text = helper.download( "http://ishow.gr/showTodayChannelProgramm.asp?cid=" + channel + "&gotoDay=" + str(i)) if text is None: continue sections = helper.split(text, "<tr id=\"progTr", "</tr>") laststart = datetime.datetime.min.replace(tzinfo=tz) for section in sections: show = {} temp = re.search( "<td class=\"progTd progTdTime\".*?>(\d\d):(\d\d)", section) show["start"] = date.replace(hour=int(temp.group(1)), minute=int(temp.group(2)), second=0, microsecond=0) if show["start"] < laststart: date += datetime.timedelta(days=1) show["start"] += datetime.timedelta(days=1) if (show["start"] - now).total_seconds() / 3600 > timespan: lastshow = True else: lastshow = False laststart = show["start"] title = re.search("<div class=\"grandTitle\".*>(.+)\s*?</div>", section) show["title"] = helper.cleanup(title.group(1)) subtitle = helper.cut(section, "<div class=\"subTitle\">", "</div>") if subtitle is not None and subtitle: show["sub-title"] = helper.cleanup(subtitle) temp = re.search("<div class=\"grandTitle\">.*?href=\"(.*?)\"", section) if temp is not None: show["details-url"] = "http://ishow.gr" + temp.group(1) shows.append(show) if lastshow: return shows return shows
def getConstraintsForAll(dataTensor, variables, orderingNotImp): repeatDim = () r = set([v for v in range(len(variables)) if v not in repeatDim]) constraints = {} for l, (m, s) in enumerate(helper.split(r, (), repeatDim)): newset = m + s # this value will be used to filter max constraints maxPossible = 1 for i in range(len(s)): maxPossible *= len(variables[s[i]]) idTensor = helper.tensorIndicator(dataTensor, newset, variables) sumSet = range(len(m), len(newset)) sumTensor_max, sumTensor_min = helper.tensorSum( idTensor, sumSet, np.array(variables)[list(newset)], 0) if len(set(s)) == 1 and len(set(orderingNotImp) & set(s)) == 0: ( minConsZero, maxConsZero, minConsNonZero, maxConsNonZero, ) = helper.tensorConsZero(idTensor, sumSet, np.array(variables)[list(newset)]) else: minConsZero, maxConsZero, minConsNonZero, maxConsNonZero = (0, 0, 0, 0) row = {} row["minSum"] = int( sumTensor_min) if sumTensor_min < maxPossible else 0 row["maxSum"] = int( sumTensor_max) if sumTensor_max < maxPossible else 0 row["minConsZero"] = int( minConsZero) if minConsZero < maxPossible else 0 row["maxConsZero"] = int( maxConsZero) if maxConsZero < maxPossible else 0 row["minConsNonZero"] = (int(minConsNonZero) if minConsNonZero < maxPossible else 0) row["maxConsNonZero"] = (int(maxConsNonZero) if maxConsNonZero < maxPossible else 0) key = ",".join([str(i) for i in m]) key += ":" key += ",".join([str(i) for i in s]) constraints[key] = row return constraints
def grab(channel, timespan): tz = pytz.timezone("Europe/Berlin") now = datetime.datetime.now(tz) shows = [] a = 0 if now.time().hour < 7: a = -1 for i in range(a, 14): date = now + datetime.timedelta(days=i) text = helper.download("http://www.zdf.de/live-tv?airtimeDate=" + date.strftime("%Y-%m-%d")) if text is None: continue text = helper.cut(text, "<section class=\"b-epg-timeline timeline-" + channel, "</section>") sections = helper.split(text, "<li", "</li>") laststart = datetime.datetime.min.replace(tzinfo=tz) for section in sections: show = {} temp = helper.cut(section, "<span class=\"time\">", "</span>") temp = re.search("(\d\d):(\d\d) - (\d\d):(\d\d)", temp) show["start"] = date.replace(hour=int(temp.group(1)), minute=int(temp.group(2)), second=0, microsecond=0) if show["start"] < laststart: date += datetime.timedelta(days=1) show["start"] += datetime.timedelta(days=1) if (show["start"] - now).total_seconds() / 3600 > timespan: return shows laststart = show["start"] show["stop"] = date.replace(hour=int(temp.group(3)), minute=int(temp.group(4)), second=0, microsecond=0) if show["stop"] < show["start"]: show["stop"] += datetime.timedelta(days=1) temp = re.search("<span class=\"overlay-link-category\">(.*?)<span class=\"visuallyhidden\">:</span></span>\s*(?:<.*>)*\s*(.*?)\s*?</a>", section) if temp.group(1): show["title"] = helper.cleanup(temp.group(1) + " - " + temp.group(2)) else: show["title"] = helper.cleanup(temp.group(2)) temp = re.search("contentUrl\": \"(.*)\"", section) if temp is not None: show["details-url"] = "http://www.zdf.de" + temp.group(1) shows.append(show) return shows
def grab(channel, timespan): tz = pytz.timezone("Europe/Berlin") now = datetime.datetime.now(tz) shows = [] a = 0 if now.time().hour < 7: a = -1 for i in range(a, 14): date = now + datetime.timedelta(days=i) datestring = "%s.%s.%s" % (date.day, date.month, date.year) text = helper.download("http://programm.ard.de/TV/Programm/Sender?datum=" + date.strftime("%d.%m.%Y") + "&hour=0&sender=" + channel) if text is None: continue sections = helper.split(text, "<li class=\"eid", "</li>") laststart = datetime.datetime.min.replace(tzinfo=tz) for section in sections: show = {} temp = re.search("<span class=\"date[\s\S]*?(\d\d):(\d\d)", section) show["start"] = date.replace(hour=int(temp.group(1)), minute=int(temp.group(2)), second=0, microsecond=0) if show["start"] < laststart: date += datetime.timedelta(days=1) show["start"] += datetime.timedelta(days=1) if (show["start"] - now).total_seconds() / 3600 > timespan: lastshow = True else: lastshow = False laststart = show["start"] show["title"] = helper.cleanup(re.search("<span class=\"title[\s\S]*?>\s*([^<]*?)[\t\n]", section).group(1)) temp = re.search("<span class=\"subtitle[\s\S]*?>\s*([^<]*?)[\t\n]", section) if temp is not None: subtitle = temp.group(1) if subtitle: show["sub-title"] = helper.cleanup(subtitle) temp = re.search("<a class=\"sendungslink[\s\S]*?href=\"(.*?)\"", section) if temp is not None: show["details-url"] = "http://programm.ard.de" + temp.group(1) shows.append(show) if lastshow: return shows return shows
def grab(channel, timespan): tz = pytz.timezone("UTC") now = datetime.datetime.now(tz) shows = [] laststart = datetime.datetime.min.replace(tzinfo=tz) for i in range(1 + timespan // 4): timestamp = int(time.time()) + i * 14400 text = helper.download("https://www.dw.com/epg/data/4765/1/" + str(timestamp) + "000") if text is None: continue channeldata = helper.cut(text, "data-channel-id=\"" + channel + "\"", "data-channel-id") if not channeldata: try: channeldata = text.split("data-channel-id=\"" + channel + "\"")[1] except IndexError: continue sections = helper.split(channeldata, "<div class=\"epgProgram\"", "<div class=\"broadcastlinks\">") for section in sections: show = {} day = helper.cut(section, "data-day=\"", "\"") begintime = helper.cut(section, "data-begin-time=\"", "\"") endtime = helper.cut(section, "data-end-time=\"", "\"") show["start"] = pytz.utc.localize( datetime.datetime.strptime(day + begintime, "%Y-%m-%d%H:%M")) if show["start"] <= laststart: continue if (show["start"] - now).total_seconds() / 3600 > timespan: return shows laststart = show["start"] show["stop"] = pytz.utc.localize( datetime.datetime.strptime(day + endtime, "%Y-%m-%d%H:%M")) if show["stop"] < show["start"]: show["stop"] += datetime.timedelta(days=1) show["title"] = helper.cleanup( helper.cut(section, "<h2 class=\"title\">", "</h2>")) url = helper.cut(section, "<a href=\"", "\">") if url is not None and url: show["url"] = "https://www.dw.com" + url description = helper.cleanup( helper.cut(section, "<ul class=\"topics\">", "</ul>")) if description is not None and description: show["desc"] = description try: icon = re.search("<img[\s\S]*?/>", section).group(0) width = helper.cut(icon, "width=\"", "\"") height = helper.cut(icon, "height=\"", "\"") src = "https://www.dw.com" + helper.cut(icon, "src=\"", "\"") show["icon"] = (src, {"width": width, "height": height}) except (AttributeError, IndexError): pass shows.append(show) return shows
def split_file(path): chunk_size = 256*1024 spl = path.split('/') dest = 'split/{}'.format(spl[-1]) split(path,dest,chunk_size)