示例#1
0
    def _parse_post_info(self, post_info_td):
        """ post_info_td DOM

       <td class="author">
        <div class="msg_stats_left">
            <span class="author_data"><a name="1"></a>#1</span>
            <span class="author_data">
                <a href="/users/jumi/boards" class="name">
                    <b>jumi</b>
                </a>
            </span>
            *** OPTIONAL ***
            <span class="author_data">(Topic Creator)</span>
            *** /OPTIONAL ***
            <span class="author_data">Posted 4/5/2014 1:56:08 PM</span>
            <span class="author_data">
                <a href="/boards/2000121-anime-and-manga-other-titles/68960382/779732076">message detail</a>
            </span>
        </div>
       </td>

        NOTE: archived topics are different (creator is not in span class)
        """
        post_infos = list(post_info_td.div.children)
        post_num = post_infos[0].a["name"]
        post_creator = post_infos[1].text
        try:
            dt_raw = " ".join(post_infos[2].text.split())
            post_dt = strptime(dt_raw, POST_DATE_FORMAT_STR)
        except ValueError, e:
            dt_raw = " ".join(post_infos[3].text.split())
            post_dt = strptime(dt_raw, POST_DATE_FORMAT_STR)
示例#2
0
    def _parse_topic(self, topic_tr):
        """ Parses a topic row and returns Topic object

        DOM layout
        <tr class="topics">
          <td class="board_status">
            <i class="board_icon board_icon_topic"></i>
          </td>

          <td class="topic">
            <a href="**TOPIC_LINK**">**TOPIC_TITLE**</a>
            <br><span class="pglist">..</span>
          </td>
          <td class="tauthor">
            <span> <a>**USERNAME**</a></span>
          </td>

          <td class="count">23</td>

          <td class="lastpost"><a href="">4/5 3:20PM</a> </td>
        </tr>
        """
        tds = topic_tr.find_all("td")
        assert tds
        assert len(tds) == 5, "Topic html invalid format (%s)" % self.base_url()

        # get topic icon
        status_img_el = list(tds[0].children)[0]
        status_img = status_img_el["class"][-1]
        if status_img not in TOPIC_STATUS_MAP:
            logger.warn("Topic status %s unknown" % status_img)
            topic_status = Topic.NORMAL
        else:
            topic_status = TOPIC_STATUS_MAP.get(status_img)

        # get topic title
        topic_gfaqs_id = tds[1].a["href"].split("/")[-1]
        topic_title = tds[1].a.text

        # get creator username
        username = tds[2].text.split()
        # we split because username might have (M) at the end
        if "(M)" in username[-1]:
            username = username[:-1]
        username = '******'.join(username)
        creator = User(username=username)

        # get post count of topic
        post_count = int(tds[3].text)

        # get laste post date of topic
        try:
            date_raw = tds[4].a.text
            dt = strptime(date_raw, TOPIC_DATE_FORMAT_STR)
            # the year is not sepcified on gfaqs,
            # so I'll set it to the current year
            curr_year = datetime.now().year
            dt = datetime(curr_year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
        except ValueError:
            # archived topic, use alternative format str
            dt = strptime(date_raw, TOPIC_DATE_ALT_FORMAT_STR)

        return Topic(board=self.board,
                creator=creator,
                gfaqs_id=topic_gfaqs_id,
                title=topic_title,
                number_of_posts=post_count,
                last_post_date=dt,
                status=topic_status)