def scrape_streaming_channels(): # scrapes the channel list which is listed on site as currently streaming. These info is not very reliable some of the times. Used to retrive channel name and event timing # arguments: None # returns {n:channel,}:dict ; {1:{"link":"/channel1","name":"Lig TV...","status":"",},2:{...}..] online_chans={} page=bt.get_page(domain+"/HD/TV/info/home",encoding) articles=re.findall("<article>(.*?)</article>",page,re.DOTALL) for article in articles: etime=re.findall('<time datetime="([0-9]*?)">([0-9]*?)\:([0-9]*?)</time>',article) if not len(etime)>0: etime=re.findall('<time datetime=".*?">(.*?)</time>',article) if len(etime)>0: event_time=etime[0] else: continue else: event_time=datetime.datetime.now() event_time=event_time.replace(year=int(etime[0][0]),hour=int(etime[0][1]),minute=int(etime[0][2])) event_time=bt.localtime_from_timezone(event_time,"Turkey") channel=re.findall('href="channel([0-9]*?)" class="pix-hover">(.*?)</a>',article) if not len(channel)>0: continue else: if type(event_time) is datetime.datetime: event_time=event_time.strftime("%H:%M") cnum,cname=channel[0] ctitle="%s : %s "%(event_time,cname) online_chans[int(cnum)]={"link":"info/channel%s"%cnum,"name":ctitle,"status":"","number":int(cnum),"last_check":None} return online_chans
def scrape_streaming_channels(): # scrapes the channel list which is listed on site as currently streaming. These info are not very reliable some of the times. Used to retrive channel name and event timing # arguments: None # returns {n:channel,}:dict ; {1:{"link":"/channel1","name":"Lig TV...","status":"",},2:{...}..] online_chans={} page=bt.get_page(defs.domain+"/home",defs.encoding) events=re.findall('<div class="top-event">(.*?)</div>',page,re.DOTALL) for event in events: title=re.findall('<h2 class="pix-post-title">(.*?)<br>',event,re.DOTALL) title=re.sub('<[^<]+?>', '', title[0]) title=re.sub(' I ',' ',title) title=re.sub('\s\s',' ',title) cnums=re.findall('<a target="webspor" href="/kanal([0-9]*?)">',event,re.DOTALL) etime=title.split()[0] opps=" ".join(title.split()[1:]) if ":" in etime: hour,minute=etime.split(":") event_time=datetime.datetime.now() event_time=event_time.replace(hour=int(hour),minute=int(minute)) event_time=bt.localtime_from_timezone(event_time,"Turkey") event_time=event_time.strftime("%H:%M") else: event_time=etime ctitle="%s | %s "%(event_time,opps) for cnum in cnums: online_chans[int(cnum)]={"link":"/kanal%s"%cnum,"name":ctitle,"status":"","number":int(cnum),"last_check":None} return online_chans