Пример #1
0
    def parse_epg(self, response):


        content_div = response.xpath("//div[@class='schedule']/div[@class='outer-container']/"
                                     "div[@class='inner-container']/div[contains(@class,'schedule-row')]")
        if len(content_div) == 0:
            raise CloseSpider('no epg info!')
        day = response.url.split("/")[-1];
        nextday = next_day(self.formats, day)
        for div in content_div:
            starttime = div.xpath(".//div[contains(@class,'program-time')]/text()").extract()[0].strip()
            if starttime < "06:00":
                starttime = trans_format(nextday+starttime, self.formats+"%H:%M")
            else:
                starttime = trans_format(day+starttime, self.formats+"%H:%M")

            title = div.xpath(".//div[contains(@class,'program-info')]/h4").extract()[0].strip()
            reg = re.compile(r'\<h4.*?\>\s*(?:\<a href.*?\>)*(.*?)(?:\</a\>)*\s*\</h4\>', re.S)
            s = reg.search(title)
            title = s.group(1)
            meta = div.xpath(".//div[contains(@class,'program-info')]/p/text()").extract()[0]
            meta = meta.split(":")[0].strip()
            name = "%s %s" % (title, meta)

            desc = div.xpath(".//div[contains(@class,'program-info')]/div[contains(@class,'program-synopsis')]/p/text()").extract()[0]

            item = EpgItem()
            item['name'] = name
            item['desc'] = desc.strip()
            item['starttime'] = starttime
            item['endtime'] = ''
            yield item
Пример #2
0
    def parse_epg(self, response):
        json_str = json.loads(response.body)
        for channel in json_str:
            if channel['channelName'] == "si4a":
                programs = channel['channels']

        for program in programs:
            name = program['genre']
            starttime = program['date'] + " " + program['start_time']
            endtime = program['date'] + " " + program['end_time']

            starttime = trans_format(starttime, "%m/%d/%Y %H:%M:%S")
            endtime = trans_format(endtime, "%m/%d/%Y %H:%M:%S")

            item = EpgItem()
            item['name'] = name
            item['starttime'] = starttime
            item['endtime'] = endtime
            item['desc'] = ''
            yield item
Пример #3
0
    def parse(self, response):
        program_position = response.xpath("//div[@class='box-container-wrapper']/div[contains(@class,'date-program-wrapper')]")
        for dates in program_position:
            date = dates.xpath("./div[@class='box-inner-container-header']/h2/text()").extract()[0][-10:]
            date = trans_format(date, "%d-%m-%Y", "%Y.%m.%d")
            programs = dates.xpath("./div[@class='box-inner-container-wrapper']/div")
            for program in programs:
                name = program.xpath("./div[@class='title']/h2/text()").extract()[0]
                times = program.xpath("./div[@class='timing']/time/text()").extract()[0]
                times = times.split("/")[0][0:7].strip().replace(":", ".")
                times = time12to24(times)
                starttime = trans_format("%s %s" % (date, times), "%Y.%m.%d %H:%M")
                ftime = datetime.datetime.strptime(starttime, "%Y.%m.%d %H:%M:%S")
                ftime = ftime + datetime.timedelta(hours=8)
                starttime = ftime.strftime("%Y.%m.%d %H:%M:%S")

                item = EpgItem()
                item['name'] = name
                item['starttime'] = starttime
                item['endtime'] = ''
                item['desc'] = ''
                yield item
Пример #4
0
    def parse_epg(self, response):
        date_str = response.url[-8:]
        program_position = response.xpath("//div[@class='schedule_grid ']")
        reg = re.compile(r"\s\s+")
        for dates in program_position:
            program = dates.xpath("./div[@class='schedule_details']")
            program_time = program.xpath("./p[@class='info']/text()").extract()[0]
            title = program.xpath("./p[@class='title']/a/text()").extract()[0]
            try:
                subtitle = program.xpath("./p[@class='title']/a/span/text()").extract()[0]
            except IndexError:
                subtitle = ""
            except ValueError:
                subtitle = ""

            program_time = time12to24(reg.sub("", program_time).replace(" ", ""))
            starttime = trans_format("%s %s" % (date_str, program_time), self.formats + " %H:%M")

            item = EpgItem()
            item["name"] = reg.sub("", title.strip() + subtitle.strip())
            item["starttime"] = starttime
            item["endtime"] = ""
            item["desc"] = ""
            yield item