def write_schedule(): output = config.output_dir() os.makedirs(output, exist_ok=True) schedule = os.path.join(output, 'schedule.txt') summaries = os.path.join(output, 'description.txt') missing = os.path.join(output, 'missing.txt') with contextlib.ExitStack() as stack: f_prog = stack.enter_context(_open_w_ext(schedule)) f_sum = stack.enter_context(_open_w_ext(summaries)) f_missing = stack.enter_context(_open_w(missing)) writer = _ScheduleWriter(f_prog, f_sum) for line in config.channels(): line = line.rstrip() print(line) try: result = writer.write(line[line.find('.') + 2:]) except socket.error: result = False except http.client.HTTPException: result = False if not result: f_missing.write(line + '\n')
import os.path import lxml.etree from tv_schedule import config URL = 'http://www.ntvplus.ru/tv/#genre=3494' parser = lxml.etree.HTMLParser() tree = lxml.etree.parse(URL, parser) doc = tree.getroot() with open(os.path.join(config.output_dir(), 'ntvplus.txt'), 'w') as f: channels = doc[1][5][0][4][0][0][1] for label in channels[:-2]: inp = label[0] f.write('{}: "{}"\n'.format(inp.tail, inp.get('id')))
import os.path import httplib2 import lxml.etree from tv_schedule import config URL = 'https://tv.yandex.ru/87/channels/' parser = lxml.etree.HTMLParser(encoding='utf-8') http = httplib2.Http(ca_certs=r'c:\Downloads\tv.yandex.ru.crt') with open(os.path.join(config.output_dir(), 'yandex.txt'), 'w') as f: for i in range(0, 1398): url = URL + str(i) content = http.request(url)[1] if len(content) > 0: doc = lxml.etree.fromstring(content, parser) channel = doc[1][0][1][0][0].text s = "{}: '{}'".format(channel, i) f.write(s + '\n') print(s)
import os.path import lxml.etree from tv_schedule import config URL = 'http://www.viasat.lv/viasat0/tv-programma27/tv-programma28/_/all/' parser = lxml.etree.HTMLParser() tree = lxml.etree.parse(URL, parser) doc = tree.getroot() chan_list = doc[1][0][5][0][3][0][1] path = os.path.join(config.output_dir(), 'viasat.txt') with open(path, 'w', encoding='utf-8') as f: for chan in chan_list[0:-1]: a = chan[0] ch_code = a.get('href').split('/')[-2] s = '{}: "{}"'.format(a.text.strip(), ch_code) f.write(s + '\n') print(s)
import os.path import lxml.html from tv_schedule import config URL = 'http://tv.ua/channels' tree = lxml.html.parse(URL) doc = tree.getroot() channels = doc.get_element_by_id('channels') with open(os.path.join(config.output_dir(), 'tv_ua.txt'), 'w') as f: for a in channels.find_class('orange'): ch_code = os.path.basename(a.get('href')) ch_code = '"' + ch_code[ch_code.rindex('-') + 1:] + '"' f.write('{}: {}\n'.format(a[0].text, ch_code))
import os.path import json import httplib2 from tv_schedule import config _URL = 'http://tv.mail.ru/ext/admtv/?sch.main=1&sch.channel_type=' _http = httplib2.Http() with open(os.path.join(config.output_dir(), 'mail_ru.txt'), 'w') as f: for i in range(1, 13): istr = str(i) content = _http.request(_URL + istr)[1].decode() chan_type = json.loads(content)['channel_type'] l = chan_type.get(istr) if l is not None: for chan in l: ch_code = chan['url'].split('/')[2] s = '{}: "{}"'.format(chan['name'], ch_code) f.write(s + '\n') print(s)
import os.path import lxml.etree from tv_schedule import config URL = 'http://www.tv.lv/channels/' parser = lxml.etree.HTMLParser() tree = lxml.etree.parse(URL, parser) doc = tree.getroot() path = os.path.join(config.output_dir(), 'tv_lv.txt') with open(path, 'w', encoding='utf-8') as f: for item in doc[1][6][2][0][0][2][0][0][0][0][2][1][1:]: it = item.iterchildren() f.write('{}: "{}"\n'.format(next(it)[0].text, next(it).get('data-id')))
import os.path import lxml.etree from tv_schedule import config URL = 'http://tv.akado.ru/channels' parser = lxml.etree.HTMLParser(encoding='utf-8') tree = lxml.etree.parse(URL, parser) doc = tree.getroot() div = doc[1][0][4][0][0][0] with open(os.path.join(config.output_dir(), 'akado.txt'), 'w') as f: for table in div[1: - 1]: for row in table[1]: a = row[0][0][0] ch_code = os.path.splitext(a.get('href'))[0] f.write("{}: {}\n".format(a.text, ch_code))
import httplib2 import lxml.etree from encodings import cp866 from tv_schedule import config URL = 'http://www.vsetv.com/schedule_channel_{}_week.html' hdrs = { 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; rv:9.0) Gecko/20100101 Firefox/9.0' } http = httplib2.Http() parser = lxml.etree.HTMLParser() # print ukrainian 'i' to console as latin 'i' cp866.encoding_map[ord('і')] = cp866.encoding_map[ord('i')] with open(os.path.join(config.output_dir(), 'vsetv.txt'), 'w') as f: for i in range(1, 1023): url = URL.format(i) content = http.request(url, headers=hdrs)[1] doc = lxml.etree.fromstring(content, parser) table = doc[3][6] # <!-- Base Table (1 колонка / 8 строк) --> chlogo = table[3][0][2] if chlogo.tag == 'div': channel = chlogo[0].get('alt') s = "{}: '{}'".format(channel, i) f.write(s + '\n') print(s)