示例#1
0
文件: vimeo.py 项目: tlang0/wstbot
    def find_info(self, url):
        # get video id
        match = re.match(URL_REGEX_PREFIX + 'vimeo\.com/(\S*)', url)
        if match is None:
            return

        self.video_id = match.group(1)
        logger.info("Found vimeo video: " + self.video_id)

        json_url = "http://vimeo.com/api/v2/video/{0}.json".format(self.video_id)
        json_content = download_page(json_url).decode("utf-8")
        json_data = json.loads(json_content)
        json_data = json_data[0]
    
        raw_title = json_data["title"]
        title = self.msg_formats.bold(self.msg_formats.red(raw_title))
        secs = int(json_data["duration"])
        duration = self.msg_formats.green(str(int(secs / 60)) + "m " + str(secs % 60) + "s")

        message = "{0} :: {1}".format(title, duration)

        return (message, raw_title)
示例#2
0
    def search_site(self, url, resource_dict):
        """Downloads the URL's content, searches for the paths and patterns
        and builds a message out of the matched data.

        Arguments: resource_dict contains the paths, patterns and additional data for
        the url.
        """

        if self.sitedata is None:
            return

        # retrieve content
        try:
            content = download_page(url).decode(WEB_ENCODING, "replace")
        except:
            return
        if content is None:
            return

        message = None
        title = None

        def info_xpath():
            # try to find info using xpath
            root = lxml.html.fromstring(content)
            items = root.xpath(info["xpath"])
            logger.debug("using xpath: " + info["xpath"])
            if items is not None and len(items) >= 1:
                return items[0]
            else:
                return None

        def info_regex():
            # try to find info using a regex pattern
            logger.debug("using regex: " + info["pattern"])
            match = re.search(info["pattern"], content)
            if match is None:
                logger.warning(
                    "Could not find info! (match == None) with pattern: " +
                    info["pattern"])
                return None
            if match.groups() is None:
                logger.warning("match.groups() was None")
                return None
            if len(match.groups()) <= 0:
                logger.warning("Found match but no groups")
                return None

            return match.group(1)

        for info in resource_dict["patterns"]:
            if not "pattern" in info and not "xpath" in info:
                logger.error(
                    "siteinfo entry does not contain a path or pattern!")
                break

            infodata = None
            # try regex first because it seems to be faster
            if "pattern" in info:
                infodata = info_regex()
            # try xpath if there was no pattern or regex was unsuccessful
            if infodata is None and "xpath" in info:
                infodata = info_xpath()

            if infodata is None:
                logger.warning("infodata was None!")
                break

            logger.debug("\ninfodata:\n")
            logger.debug(infodata)

            if infodata is None or infodata == "":
                continue

            logger.info("found info data: " + infodata)
            infodata = unescape(infodata)
            infodata = escape(infodata)

            infodata = infodata.strip()
            if title is None:
                title = infodata

            color = REST_COLOR
            style = REST_STYLE
            if message is None:
                message = ""
                color = FIRST_COLOR
                style = FIRST_STYLE
            message += self.msg_formats.get(
                style, self.msg_formats.get(color, infodata))
            if info != resource_dict["patterns"][-1]:
                message += " " + self.sitedata["separator"] + " "

        # cut last separator if there is one
        sep = self.sitedata["separator"]
        if message is not None and message.strip()[-len(sep):] == sep:
            message = message.strip()[:-len(sep)].strip()

        return message, title
示例#3
0
    def search_site(self, url, resource_dict):
        """Downloads the URL's content, searches for the paths and patterns
        and builds a message out of the matched data.

        Arguments: resource_dict contains the paths, patterns and additional data for
        the url.
        """

        if self.sitedata is None:
            return

        # retrieve content
        try:
            content = download_page(url).decode(WEB_ENCODING, "replace")
        except:
            return
        if content is None:
            return

        message = None
        title = None

        def info_xpath():
            # try to find info using xpath
            root = lxml.html.fromstring(content)
            items = root.xpath(info["xpath"])
            logger.debug("using xpath: " + info["xpath"])
            if items is not None and len(items) >= 1:
                return items[0]
            else:
                return None

        def info_regex():
            # try to find info using a regex pattern
            logger.debug("using regex: " + info["pattern"])
            match = re.search(info["pattern"], content)
            if match is None:
                logger.warning("Could not find info! (match == None) with pattern: " + info["pattern"])
                return None
            if match.groups() is None:
                logger.warning("match.groups() was None")
                return None
            if len(match.groups()) <= 0:
                logger.warning("Found match but no groups")
                return None

            return match.group(1)

        for info in resource_dict["patterns"]:
            if not "pattern" in info and not "xpath" in info:
                logger.error("siteinfo entry does not contain a path or pattern!")
                break

            infodata = None
            # try regex first because it seems to be faster
            if "pattern" in info:
                infodata = info_regex()
            # try xpath if there was no pattern or regex was unsuccessful
            if infodata is None and "xpath" in info:
                infodata = info_xpath()

            if infodata is None:
                logger.warning("infodata was None!")
                break

            logger.debug("\ninfodata:\n")
            logger.debug(infodata)

            if infodata is None or infodata == "":
                continue

            logger.info("found info data: " + infodata)
            infodata = unescape(infodata)
            infodata = escape(infodata)

            infodata = infodata.strip()
            if title is None:
                title = infodata

            color = REST_COLOR
            style = REST_STYLE
            if message is None:
                message = ""
                color = FIRST_COLOR
                style = FIRST_STYLE
            message += self.msg_formats.get(style, self.msg_formats.get(color, infodata))
            if info != resource_dict["patterns"][-1]:
                message += " " + self.sitedata["separator"] + " "

        # cut last separator if there is one
        sep = self.sitedata["separator"]
        if message is not None and message.strip()[-len(sep) :] == sep:
            message = message.strip()[: -len(sep)].strip()

        return message, title