Exemplo n.º 1
0
    def parse_author_id(self):  # TODO 这个部分可以不重复的
        u"""
        获得author_id
        :return:
        """
        author_id_href = False
        author_id = self.dom.select('div.blognavInfo span a')
        if author_id:
            author_id_href = ParserTools.get_attr(
                author_id[1], 'href')  # 因为creator_id[0]是首页的链接

        if not author_id_href:
            Debug.logger.debug(u"没有找到creator_id")
            return
        result = Match.SinaBlog_profile(author_id_href)
        SinaBlog_id = result.group('SinaBlog_people_id')
        self.info['author_id'] = SinaBlog_id
Exemplo n.º 2
0
    def parse_creator_id(self):
        u"""

        :return:
        """
        creator_id = self.dom.select('div.blognavInfo span a')
        if not creator_id:
            Debug.logger.debug(u"没有找到creator_id")
            return
        creator_id_href = ParserTools.get_attr(creator_id[1],
                                               'href')  # 因为creator_id[0]是首页的链接

        if not creator_id_href:
            Debug.logger.debug(u"没有找到creator_id")
            return
        result = Match.SinaBlog_profile(creator_id_href)
        SinaBlog_id = result.group('SinaBlog_people_id')
        self.info['creator_id'] = SinaBlog_id