Пример #1
0
    def __init__(self, div):
        """
        :param div: The dic of the recommendation to parse all the data from it.
        :type div: bs4.element.Tag
        """
        from pymal import account, anime

        recommended, recommends_divs = div.table.tbody.tr.findAll(name="td", recursive=False)

        self.recommended_anime = anime.Anime(int(recommended.div.a["href"].split('/')[2]))

        data = recommends_divs.findAll(name="div", recursive=False)
        if 3 == len(data):
            recommends = [data[2]]
        elif 5 == len(data):
            _, _, first_recommend, _, other_recommends = data
            recommends = [first_recommend] + other_recommends.findAll(name="div", recursive=False)
        else:
            raise exceptions.FailedToReloadError( "Unknown size of data: " + str(len(data)))

        self.recommends = dict()

        for recommend in recommends:
            recommend_data, user_data = recommend.findAll(name="div", recursive=False)
            username = user_data.find(name='a', recursive=False)["href"].split('/')[2]
            self.recommends[account.Account(username)] = recommend_data.text
Пример #2
0
    def my_reload(self):
        """
        Reloading data from MAL.
        """
        from pymal import global_functions

        # Getting content wrapper <div>
        content_wrapper_div = global_functions.get_content_wrapper_div(
            self.__my_mal_url, self._account.auth_connect)

        bas_result = content_wrapper_div.find(name='div',
                                              attrs={'class': 'badresult'})
        if bas_result is not None:
            raise exceptions.FailedToReloadError(bas_result)

        # Getting content <td>
        content_div = content_wrapper_div.find(name="div",
                                               attrs={"id": "content"},
                                               recursive=False)
        if content_div is None:
            raise exceptions.FailedToReloadError(content_wrapper_div)
        content_td = content_div.table.tr.td
        if content_td is None:
            raise exceptions.FailedToReloadError(content_div)

        # Getting content rows <tr>
        content_form = content_td.find(name="form", attrs={'id': "mangaForm"})
        if content_form is None:
            raise exceptions.FailedToReloadError(content_td)
        content_rows = content_form.table.tbody.findAll(name="tr",
                                                        recursive=False)

        contents_divs_index = 2

        # Getting my_status
        status_select = content_rows[contents_divs_index].find(name="select",
                                                               attrs={
                                                                   "id":
                                                                   "status",
                                                                   "name":
                                                                   "status"
                                                               })
        if status_select is None:
            raise exceptions.FailedToReloadError(content_rows)

        # TODO: make this look better
        status_selected_options = list(
            filter(lambda x: 'selected' in x.attrs,
                   status_select.findAll(name="option")))
        if 1 != len(status_selected_options):
            raise exceptions.FailedToReloadError(status_selected_options)
        self.__my_status = int(status_selected_options[0]['value'])

        is_reread_node = content_rows[contents_divs_index].find(
            name="input", attrs={"id": "rereadingBox"})
        if is_reread_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        self.__my_is_rereading = bool(is_reread_node['value'])
        contents_divs_index += 1

        # Getting read volumes
        read_input = content_rows[contents_divs_index].\
            find(name="input", attrs={"id": "vol_read",
                                      "name": "vol_read"})
        if read_input is None:
            raise exceptions.FailedToReloadError(content_rows)
        self.__my_completed_volumes = int(read_input['value'])
        contents_divs_index += 1

        # Getting read chapters
        read_input = content_rows[contents_divs_index].\
            find(name="input", attrs={"id": "chap_read",
                                      "name": "chap_read"})
        if read_input is None:
            raise exceptions.FailedToReloadError(content_rows)
        self.__my_completed_chapters = int(read_input['value'])
        contents_divs_index += 1

        # Getting my_score
        score_select = content_rows[contents_divs_index].find(
            name="select", attrs={"name": "score"})
        if score_select is None:
            raise exceptions.FailedToReloadError(content_rows)
        score_selected_option = score_select.find(name="option",
                                                  attrs={"selected": ""})
        if score_selected_option is None:
            raise exceptions.FailedToReloadError(content_rows)
        self.__my_score = int(float(score_selected_option['value']))
        contents_divs_index += 1

        # Getting my_tags...
        tag_content = content_rows[contents_divs_index]
        tag_textarea = tag_content.find(name="textarea",
                                        attrs={"name": "tags"})
        self.__my_tags = frozenset(
            tag_textarea.text.split(self.__TAG_SEPARATOR))
        contents_divs_index += 1

        # Getting start date
        start_month_date_node = content_rows[contents_divs_index].find(
            name="select", attrs={"name": "startMonth"})
        if start_month_date_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        start_month_date = start_month_date_node.find(name="option",
                                                      attrs={"selected": ""})

        start_day_date_node = content_rows[contents_divs_index].find(
            name="select", attrs={"name": "startDay"})
        if start_day_date_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        start_day_date = start_day_date_node.find(name="option",
                                                  attrs={"selected": ""})

        start_year_date_node = content_rows[contents_divs_index].find(
            name="select", attrs={"name": "startYear"})
        if start_year_date_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        start_year_date = start_year_date_node.find(name="option",
                                                    attrs={"selected": ""})

        start_month_date = str(start_month_date['value']).zfill(2)
        start_day_date = str(start_day_date['value']).zfill(2)
        start_year_date = str(start_year_date['value']).zfill(2)
        self.__my_start_date = start_month_date + \
            start_day_date + start_year_date
        contents_divs_index += 1

        # Getting end date
        end_month_date_node = content_rows[contents_divs_index].find(
            name="select", attrs={"name": "endMonth"})
        if end_month_date_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        end_month_date = end_month_date_node.find(name="option",
                                                  attrs={"selected": ""})

        end_day_date_node = content_rows[contents_divs_index].find(
            name="select", attrs={"name": "endDay"})
        if end_day_date_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        end_day_date = end_day_date_node.find(name="option",
                                              attrs={"selected": ""})

        end_year_date_node = content_rows[contents_divs_index].find(
            name="select", attrs={"name": "endYear"})
        if end_year_date_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        end_year_date = end_year_date_node.find(name="option",
                                                attrs={"selected": ""})

        end_month_date = str(end_month_date['value']).zfill(2)
        end_day_date = str(end_day_date['value']).zfill(2)
        end_year_date = str(end_year_date['value']).zfill(2)
        self.__my_end_date = end_month_date + end_day_date + end_year_date
        contents_divs_index += 1

        # Getting priority
        priority_node = content_rows[contents_divs_index].find(
            name="select", attrs={"name": "priority"})
        if priority_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        selected_priority_node = priority_node.find(name="option",
                                                    attrs={"selected": ""})
        if selected_priority_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        self.__my_priority = int(selected_priority_node['value'])
        contents_divs_index += 1

        # Getting storage
        storage_type_node = content_rows[contents_divs_index].find(
            name="select", attrs={"id": "storageSel"})
        if storage_type_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        selected_storage_type_node = storage_type_node.find(
            name="option", attrs={"selected": ""})
        if selected_storage_type_node is None:
            self.__my_storage_type = 0
        else:
            self.__my_storage_type = int(selected_storage_type_node['value'])
        contents_divs_index += 1

        # Getting downloaded episodes
        downloaded_chapters_node = content_rows[contents_divs_index].\
            find(name="input", attrs={'id': "dChap",
                                      'name': 'downloaded_chapters'})
        if downloaded_chapters_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        self.__my_downloaded_chapters == int(downloaded_chapters_node['value'])
        contents_divs_index += 1

        # Getting time reread
        times_reread_node = content_rows[contents_divs_index].find(
            name="input", attrs={'name': 'times_read'})
        self.__my_times_reread == int(times_reread_node['value'])
        if times_reread_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        contents_divs_index += 1

        # Getting reread value
        reread_value_node = content_rows[contents_divs_index].find(
            name="select", attrs={'name': 'reread_value'})
        if reread_value_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        reread_value_option = reread_value_node.find(name='option',
                                                     attrs={'selected': ''})
        if reread_value_option is None:
            self.__my_reread_value = 0
        else:
            self.__my_reread_value = int(reread_value_option['value'])
        contents_divs_index += 1

        # Getting comments
        comment_content = content_rows[contents_divs_index]
        comment_textarea = comment_content.find(name="textarea",
                                                attrs={"name": "comments"})
        self.__my_comments = comment_textarea.text
        contents_divs_index += 1

        # Getting discuss flag
        discuss_node = content_rows[contents_divs_index].find(
            name='input', attrs={"name": "discuss"})
        if discuss_node is None:
            raise exceptions.FailedToReloadError(content_rows)
        self._is_my_loaded = True
Пример #3
0
    def reload(self):
        """
        :exception exceptions.FailedToReloadError: when failed.
        """
        import os
        from pymal import exceptions

        # Getting content wrapper <div>
        content_wrapper_div = global_functions.get_content_wrapper_div(
            self.__mal_url, global_functions.connect)

        # Getting title <div>
        self.__title = content_wrapper_div.h1.contents[1].strip()

        # Getting content <div>
        content_div = content_wrapper_div.find(name="div",
                                               attrs={"id": "content"},
                                               recursive=False)

        if content_div is None:
            raise exceptions.FailedToReloadError(content_wrapper_div)

        content_table = content_div.table

        contents = content_table.tbody.tr.findAll(name="td", recursive=False)

        # Data from side content
        side_content = contents[0]
        side_contents_divs = side_content.findAll(name="div", recursive=False)

        # Getting manga image url <img>
        img_div = side_contents_divs[0]
        img_link = img_div.find(name="a")
        if img_link is None:
            raise exceptions.FailedToReloadError(content_wrapper_div)
        self.__image_url = img_link.img['src']

        side_contents_divs_index = 3

        # english <div>
        english_div = side_contents_divs[side_contents_divs_index]
        if global_functions.check_side_content_div('English', english_div):
            english_span, self_english = english_div.contents
            self.__english = self_english.strip()
            side_contents_divs_index += 1
        else:
            self.__english = ''

        # synonyms <div>
        synonyms_div = side_contents_divs[side_contents_divs_index]
        if global_functions.check_side_content_div('Synonyms', synonyms_div):
            synonyms_span, self_synonyms = synonyms_div.contents
            self.__synonyms = self_synonyms.strip()
            side_contents_divs_index += 1
        else:
            self.__synonyms = ''

        # japanese <div>
        japanese_div = side_contents_divs[side_contents_divs_index]
        if global_functions.check_side_content_div('Japanese', japanese_div):
            japanese_span, self_japanese = japanese_div.contents
            self.__japanese = self_japanese.strip()
            side_contents_divs_index += 1
        else:
            self.__japanese = ''

        # type <div>
        type_div = side_contents_divs[side_contents_divs_index]
        if not global_functions.check_side_content_div('Type', type_div):
            raise exceptions.FailedToReloadError(content_wrapper_div)
        type_span, self_type = type_div.contents
        self.__type = self_type.strip()
        side_contents_divs_index += 1

        # volumes <div>
        volumes_div = side_contents_divs[side_contents_divs_index]
        if not global_functions.check_side_content_div('Volumes', volumes_div):
            raise exceptions.FailedToReloadError(content_wrapper_div)
        volumes_span, self_volumes = volumes_div.contents
        self.__volumes = global_functions.make_counter(self_volumes.strip())
        side_contents_divs_index += 1

        # chapters <div>
        chapters_div = side_contents_divs[side_contents_divs_index]
        if not global_functions.check_side_content_div('Chapters',
                                                       chapters_div):
            raise exceptions.FailedToReloadError(content_wrapper_div)
        chapters_span, self_chapters = chapters_div.contents
        self.__chapters = global_functions.make_counter(self_chapters.strip())
        side_contents_divs_index += 1

        # status <div>
        status_div = side_contents_divs[side_contents_divs_index]
        if not global_functions.check_side_content_div('Status', status_div):
            raise exceptions.FailedToReloadError(content_wrapper_div)
        status_span, self.__status = status_div.contents
        self.__status = self.__status.strip()
        side_contents_divs_index += 1

        # published <div>
        published_div = side_contents_divs[side_contents_divs_index]
        if not global_functions.check_side_content_div('Published',
                                                       published_div):
            raise exceptions.FailedToReloadError(content_wrapper_div)
        published_span, published = published_div.contents
        self.__start_time, self.__end_time = global_functions.make_start_and_end_time(
            published)
        side_contents_divs_index += 1

        # genres <div>
        genres_div = side_contents_divs[side_contents_divs_index]
        if not global_functions.check_side_content_div('Genres', genres_div):
            raise exceptions.FailedToReloadError(content_wrapper_div)
        for genre_link in genres_div.findAll(name='a'):
            self.__genres[genre_link.text.strip()] = genre_link['href']
        side_contents_divs_index += 1

        # authors <div>
        authors_div = side_contents_divs[side_contents_divs_index]
        if not global_functions.check_side_content_div('Authors', authors_div):
            raise exceptions.FailedToReloadError(content_wrapper_div)
        for authors_link in authors_div.findAll(name='a'):
            self.__creators[authors_link.text.strip()] = authors_link['href']
        side_contents_divs_index += 1

        side_contents_divs_index += 1

        # score <div>
        score_div = side_contents_divs[side_contents_divs_index]
        if not global_functions.check_side_content_div('Score', score_div):
            raise exceptions.FailedToReloadError(content_wrapper_div)
        score_span, self_score = score_div.contents[:2]
        self.__score = float(self_score)
        side_contents_divs_index += 1

        # rank <div>
        rank_div = side_contents_divs[side_contents_divs_index]
        if not global_functions.check_side_content_div('Ranked', rank_div):
            raise exceptions.FailedToReloadError(content_wrapper_div)
        rank_span, self_rank = rank_div.contents[:2]
        self_rank = self_rank.strip()
        if not self_rank.startswith("#"):
            raise exceptions.FailedToReloadError(self_rank)
        self.__rank = int(self_rank[1:])
        side_contents_divs_index += 1

        # popularity <div>
        popularity_div = side_contents_divs[side_contents_divs_index]
        if not global_functions.check_side_content_div('Popularity',
                                                       popularity_div):
            raise exceptions.FailedToReloadError(content_wrapper_div)
        popularity_span, self_popularity = popularity_div.contents[:2]
        self_popularity = self_popularity.strip()
        if not self_popularity.startswith("#"):
            raise exceptions.FailedToReloadError(self_popularity)
        self.__popularity = int(self_popularity[1:])

        # Data from main content
        main_content = contents[1]
        main_content_inner_divs = main_content.findAll(name='div',
                                                       recursive=False)
        if 2 != len(main_content_inner_divs):
            raise exceptions.FailedToReloadError(
                "Got len(main_content_inner_divs) == {0:d}".format(
                    len(main_content_inner_divs)))
        main_content_datas = main_content_inner_divs[1].table.tbody.findAll(
            name="tr", recursive=False)

        synopsis_cell = main_content_datas[0]
        main_content_other_data = main_content_datas[1]

        # Getting synopsis
        synopsis_cell = synopsis_cell.td
        synopsis_cell_contents = synopsis_cell.contents
        if 'Synopsis' != synopsis_cell.h2.text.strip():
            raise exceptions.FailedToReloadError(synopsis_cell.h2.text.strip())
        self.__synopsis = os.linesep.join([
            synopsis_cell_content.strip()
            for synopsis_cell_content in synopsis_cell_contents[1:-1]
            if isinstance(synopsis_cell_content, bs4.element.NavigableString)
        ])

        # Getting other data
        main_content_other_data = main_content_other_data.td
        other_data_kids = [i for i in main_content_other_data.children]

        # Getting all the data under 'Related Manga'
        index = 0
        index = global_functions.get_next_index(index, other_data_kids)
        if 'h2' == other_data_kids[index].name and\
           'Related Manga' == other_data_kids[index].text.strip():
            index += 1
            while other_data_kids[index + 1].name != 'br':
                index = global_functions.make_set(
                    self.related_str_to_set_dict[
                        other_data_kids[index].strip()], index,
                    other_data_kids)
        else:
            index -= 2
        next_index = global_functions.get_next_index(index, other_data_kids)

        if consts.DEBUG:
            if next_index - index != 2:
                raise exceptions.FailedToReloadError("{0:d} - {1:d}".format(
                    next_index, index))
            index = next_index + 1

            # Getting all the data under 'Characters & Voice Actors'
            if 'h2' != other_data_kids[index].name:
                raise exceptions.FailedToReloadError('h2 == {0:s}'.format(
                    other_data_kids[index].name))
            if 'Characters' != other_data_kids[index].contents[-1]:
                raise exceptions.FailedToReloadError(
                    other_data_kids[index].contents[-1])

        tag_for_reviews = main_content_other_data.find(
            text='More reviews').parent
        link_for_reviews = request.urljoin(consts.HOST_NAME,
                                           tag_for_reviews['href'])
        self.__parse_reviews(link_for_reviews)

        tag_for_recommendations = main_content_other_data.find(
            text='More recommendations').parent
        link_for_recommendations = request.urljoin(
            consts.HOST_NAME, tag_for_recommendations['href'])
        self.__parse_recommendations(link_for_recommendations)

        self._is_loaded = True