def __init__(self, div): """ :param div: The dic of the recommendation to parse all the data from it. :type div: bs4.element.Tag """ from pymal import account, anime recommended, recommends_divs = div.table.tbody.tr.findAll(name="td", recursive=False) self.recommended_anime = anime.Anime(int(recommended.div.a["href"].split('/')[2])) data = recommends_divs.findAll(name="div", recursive=False) if 3 == len(data): recommends = [data[2]] elif 5 == len(data): _, _, first_recommend, _, other_recommends = data recommends = [first_recommend] + other_recommends.findAll(name="div", recursive=False) else: raise exceptions.FailedToReloadError( "Unknown size of data: " + str(len(data))) self.recommends = dict() for recommend in recommends: recommend_data, user_data = recommend.findAll(name="div", recursive=False) username = user_data.find(name='a', recursive=False)["href"].split('/')[2] self.recommends[account.Account(username)] = recommend_data.text
def my_reload(self): """ Reloading data from MAL. """ from pymal import global_functions # Getting content wrapper <div> content_wrapper_div = global_functions.get_content_wrapper_div( self.__my_mal_url, self._account.auth_connect) bas_result = content_wrapper_div.find(name='div', attrs={'class': 'badresult'}) if bas_result is not None: raise exceptions.FailedToReloadError(bas_result) # Getting content <td> content_div = content_wrapper_div.find(name="div", attrs={"id": "content"}, recursive=False) if content_div is None: raise exceptions.FailedToReloadError(content_wrapper_div) content_td = content_div.table.tr.td if content_td is None: raise exceptions.FailedToReloadError(content_div) # Getting content rows <tr> content_form = content_td.find(name="form", attrs={'id': "mangaForm"}) if content_form is None: raise exceptions.FailedToReloadError(content_td) content_rows = content_form.table.tbody.findAll(name="tr", recursive=False) contents_divs_index = 2 # Getting my_status status_select = content_rows[contents_divs_index].find(name="select", attrs={ "id": "status", "name": "status" }) if status_select is None: raise exceptions.FailedToReloadError(content_rows) # TODO: make this look better status_selected_options = list( filter(lambda x: 'selected' in x.attrs, status_select.findAll(name="option"))) if 1 != len(status_selected_options): raise exceptions.FailedToReloadError(status_selected_options) self.__my_status = int(status_selected_options[0]['value']) is_reread_node = content_rows[contents_divs_index].find( name="input", attrs={"id": "rereadingBox"}) if is_reread_node is None: raise exceptions.FailedToReloadError(content_rows) self.__my_is_rereading = bool(is_reread_node['value']) contents_divs_index += 1 # Getting read volumes read_input = content_rows[contents_divs_index].\ find(name="input", attrs={"id": "vol_read", "name": "vol_read"}) if read_input is None: raise exceptions.FailedToReloadError(content_rows) self.__my_completed_volumes = int(read_input['value']) contents_divs_index += 1 # Getting read chapters read_input = content_rows[contents_divs_index].\ find(name="input", attrs={"id": "chap_read", "name": "chap_read"}) if read_input is None: raise exceptions.FailedToReloadError(content_rows) self.__my_completed_chapters = int(read_input['value']) contents_divs_index += 1 # Getting my_score score_select = content_rows[contents_divs_index].find( name="select", attrs={"name": "score"}) if score_select is None: raise exceptions.FailedToReloadError(content_rows) score_selected_option = score_select.find(name="option", attrs={"selected": ""}) if score_selected_option is None: raise exceptions.FailedToReloadError(content_rows) self.__my_score = int(float(score_selected_option['value'])) contents_divs_index += 1 # Getting my_tags... tag_content = content_rows[contents_divs_index] tag_textarea = tag_content.find(name="textarea", attrs={"name": "tags"}) self.__my_tags = frozenset( tag_textarea.text.split(self.__TAG_SEPARATOR)) contents_divs_index += 1 # Getting start date start_month_date_node = content_rows[contents_divs_index].find( name="select", attrs={"name": "startMonth"}) if start_month_date_node is None: raise exceptions.FailedToReloadError(content_rows) start_month_date = start_month_date_node.find(name="option", attrs={"selected": ""}) start_day_date_node = content_rows[contents_divs_index].find( name="select", attrs={"name": "startDay"}) if start_day_date_node is None: raise exceptions.FailedToReloadError(content_rows) start_day_date = start_day_date_node.find(name="option", attrs={"selected": ""}) start_year_date_node = content_rows[contents_divs_index].find( name="select", attrs={"name": "startYear"}) if start_year_date_node is None: raise exceptions.FailedToReloadError(content_rows) start_year_date = start_year_date_node.find(name="option", attrs={"selected": ""}) start_month_date = str(start_month_date['value']).zfill(2) start_day_date = str(start_day_date['value']).zfill(2) start_year_date = str(start_year_date['value']).zfill(2) self.__my_start_date = start_month_date + \ start_day_date + start_year_date contents_divs_index += 1 # Getting end date end_month_date_node = content_rows[contents_divs_index].find( name="select", attrs={"name": "endMonth"}) if end_month_date_node is None: raise exceptions.FailedToReloadError(content_rows) end_month_date = end_month_date_node.find(name="option", attrs={"selected": ""}) end_day_date_node = content_rows[contents_divs_index].find( name="select", attrs={"name": "endDay"}) if end_day_date_node is None: raise exceptions.FailedToReloadError(content_rows) end_day_date = end_day_date_node.find(name="option", attrs={"selected": ""}) end_year_date_node = content_rows[contents_divs_index].find( name="select", attrs={"name": "endYear"}) if end_year_date_node is None: raise exceptions.FailedToReloadError(content_rows) end_year_date = end_year_date_node.find(name="option", attrs={"selected": ""}) end_month_date = str(end_month_date['value']).zfill(2) end_day_date = str(end_day_date['value']).zfill(2) end_year_date = str(end_year_date['value']).zfill(2) self.__my_end_date = end_month_date + end_day_date + end_year_date contents_divs_index += 1 # Getting priority priority_node = content_rows[contents_divs_index].find( name="select", attrs={"name": "priority"}) if priority_node is None: raise exceptions.FailedToReloadError(content_rows) selected_priority_node = priority_node.find(name="option", attrs={"selected": ""}) if selected_priority_node is None: raise exceptions.FailedToReloadError(content_rows) self.__my_priority = int(selected_priority_node['value']) contents_divs_index += 1 # Getting storage storage_type_node = content_rows[contents_divs_index].find( name="select", attrs={"id": "storageSel"}) if storage_type_node is None: raise exceptions.FailedToReloadError(content_rows) selected_storage_type_node = storage_type_node.find( name="option", attrs={"selected": ""}) if selected_storage_type_node is None: self.__my_storage_type = 0 else: self.__my_storage_type = int(selected_storage_type_node['value']) contents_divs_index += 1 # Getting downloaded episodes downloaded_chapters_node = content_rows[contents_divs_index].\ find(name="input", attrs={'id': "dChap", 'name': 'downloaded_chapters'}) if downloaded_chapters_node is None: raise exceptions.FailedToReloadError(content_rows) self.__my_downloaded_chapters == int(downloaded_chapters_node['value']) contents_divs_index += 1 # Getting time reread times_reread_node = content_rows[contents_divs_index].find( name="input", attrs={'name': 'times_read'}) self.__my_times_reread == int(times_reread_node['value']) if times_reread_node is None: raise exceptions.FailedToReloadError(content_rows) contents_divs_index += 1 # Getting reread value reread_value_node = content_rows[contents_divs_index].find( name="select", attrs={'name': 'reread_value'}) if reread_value_node is None: raise exceptions.FailedToReloadError(content_rows) reread_value_option = reread_value_node.find(name='option', attrs={'selected': ''}) if reread_value_option is None: self.__my_reread_value = 0 else: self.__my_reread_value = int(reread_value_option['value']) contents_divs_index += 1 # Getting comments comment_content = content_rows[contents_divs_index] comment_textarea = comment_content.find(name="textarea", attrs={"name": "comments"}) self.__my_comments = comment_textarea.text contents_divs_index += 1 # Getting discuss flag discuss_node = content_rows[contents_divs_index].find( name='input', attrs={"name": "discuss"}) if discuss_node is None: raise exceptions.FailedToReloadError(content_rows) self._is_my_loaded = True
def reload(self): """ :exception exceptions.FailedToReloadError: when failed. """ import os from pymal import exceptions # Getting content wrapper <div> content_wrapper_div = global_functions.get_content_wrapper_div( self.__mal_url, global_functions.connect) # Getting title <div> self.__title = content_wrapper_div.h1.contents[1].strip() # Getting content <div> content_div = content_wrapper_div.find(name="div", attrs={"id": "content"}, recursive=False) if content_div is None: raise exceptions.FailedToReloadError(content_wrapper_div) content_table = content_div.table contents = content_table.tbody.tr.findAll(name="td", recursive=False) # Data from side content side_content = contents[0] side_contents_divs = side_content.findAll(name="div", recursive=False) # Getting manga image url <img> img_div = side_contents_divs[0] img_link = img_div.find(name="a") if img_link is None: raise exceptions.FailedToReloadError(content_wrapper_div) self.__image_url = img_link.img['src'] side_contents_divs_index = 3 # english <div> english_div = side_contents_divs[side_contents_divs_index] if global_functions.check_side_content_div('English', english_div): english_span, self_english = english_div.contents self.__english = self_english.strip() side_contents_divs_index += 1 else: self.__english = '' # synonyms <div> synonyms_div = side_contents_divs[side_contents_divs_index] if global_functions.check_side_content_div('Synonyms', synonyms_div): synonyms_span, self_synonyms = synonyms_div.contents self.__synonyms = self_synonyms.strip() side_contents_divs_index += 1 else: self.__synonyms = '' # japanese <div> japanese_div = side_contents_divs[side_contents_divs_index] if global_functions.check_side_content_div('Japanese', japanese_div): japanese_span, self_japanese = japanese_div.contents self.__japanese = self_japanese.strip() side_contents_divs_index += 1 else: self.__japanese = '' # type <div> type_div = side_contents_divs[side_contents_divs_index] if not global_functions.check_side_content_div('Type', type_div): raise exceptions.FailedToReloadError(content_wrapper_div) type_span, self_type = type_div.contents self.__type = self_type.strip() side_contents_divs_index += 1 # volumes <div> volumes_div = side_contents_divs[side_contents_divs_index] if not global_functions.check_side_content_div('Volumes', volumes_div): raise exceptions.FailedToReloadError(content_wrapper_div) volumes_span, self_volumes = volumes_div.contents self.__volumes = global_functions.make_counter(self_volumes.strip()) side_contents_divs_index += 1 # chapters <div> chapters_div = side_contents_divs[side_contents_divs_index] if not global_functions.check_side_content_div('Chapters', chapters_div): raise exceptions.FailedToReloadError(content_wrapper_div) chapters_span, self_chapters = chapters_div.contents self.__chapters = global_functions.make_counter(self_chapters.strip()) side_contents_divs_index += 1 # status <div> status_div = side_contents_divs[side_contents_divs_index] if not global_functions.check_side_content_div('Status', status_div): raise exceptions.FailedToReloadError(content_wrapper_div) status_span, self.__status = status_div.contents self.__status = self.__status.strip() side_contents_divs_index += 1 # published <div> published_div = side_contents_divs[side_contents_divs_index] if not global_functions.check_side_content_div('Published', published_div): raise exceptions.FailedToReloadError(content_wrapper_div) published_span, published = published_div.contents self.__start_time, self.__end_time = global_functions.make_start_and_end_time( published) side_contents_divs_index += 1 # genres <div> genres_div = side_contents_divs[side_contents_divs_index] if not global_functions.check_side_content_div('Genres', genres_div): raise exceptions.FailedToReloadError(content_wrapper_div) for genre_link in genres_div.findAll(name='a'): self.__genres[genre_link.text.strip()] = genre_link['href'] side_contents_divs_index += 1 # authors <div> authors_div = side_contents_divs[side_contents_divs_index] if not global_functions.check_side_content_div('Authors', authors_div): raise exceptions.FailedToReloadError(content_wrapper_div) for authors_link in authors_div.findAll(name='a'): self.__creators[authors_link.text.strip()] = authors_link['href'] side_contents_divs_index += 1 side_contents_divs_index += 1 # score <div> score_div = side_contents_divs[side_contents_divs_index] if not global_functions.check_side_content_div('Score', score_div): raise exceptions.FailedToReloadError(content_wrapper_div) score_span, self_score = score_div.contents[:2] self.__score = float(self_score) side_contents_divs_index += 1 # rank <div> rank_div = side_contents_divs[side_contents_divs_index] if not global_functions.check_side_content_div('Ranked', rank_div): raise exceptions.FailedToReloadError(content_wrapper_div) rank_span, self_rank = rank_div.contents[:2] self_rank = self_rank.strip() if not self_rank.startswith("#"): raise exceptions.FailedToReloadError(self_rank) self.__rank = int(self_rank[1:]) side_contents_divs_index += 1 # popularity <div> popularity_div = side_contents_divs[side_contents_divs_index] if not global_functions.check_side_content_div('Popularity', popularity_div): raise exceptions.FailedToReloadError(content_wrapper_div) popularity_span, self_popularity = popularity_div.contents[:2] self_popularity = self_popularity.strip() if not self_popularity.startswith("#"): raise exceptions.FailedToReloadError(self_popularity) self.__popularity = int(self_popularity[1:]) # Data from main content main_content = contents[1] main_content_inner_divs = main_content.findAll(name='div', recursive=False) if 2 != len(main_content_inner_divs): raise exceptions.FailedToReloadError( "Got len(main_content_inner_divs) == {0:d}".format( len(main_content_inner_divs))) main_content_datas = main_content_inner_divs[1].table.tbody.findAll( name="tr", recursive=False) synopsis_cell = main_content_datas[0] main_content_other_data = main_content_datas[1] # Getting synopsis synopsis_cell = synopsis_cell.td synopsis_cell_contents = synopsis_cell.contents if 'Synopsis' != synopsis_cell.h2.text.strip(): raise exceptions.FailedToReloadError(synopsis_cell.h2.text.strip()) self.__synopsis = os.linesep.join([ synopsis_cell_content.strip() for synopsis_cell_content in synopsis_cell_contents[1:-1] if isinstance(synopsis_cell_content, bs4.element.NavigableString) ]) # Getting other data main_content_other_data = main_content_other_data.td other_data_kids = [i for i in main_content_other_data.children] # Getting all the data under 'Related Manga' index = 0 index = global_functions.get_next_index(index, other_data_kids) if 'h2' == other_data_kids[index].name and\ 'Related Manga' == other_data_kids[index].text.strip(): index += 1 while other_data_kids[index + 1].name != 'br': index = global_functions.make_set( self.related_str_to_set_dict[ other_data_kids[index].strip()], index, other_data_kids) else: index -= 2 next_index = global_functions.get_next_index(index, other_data_kids) if consts.DEBUG: if next_index - index != 2: raise exceptions.FailedToReloadError("{0:d} - {1:d}".format( next_index, index)) index = next_index + 1 # Getting all the data under 'Characters & Voice Actors' if 'h2' != other_data_kids[index].name: raise exceptions.FailedToReloadError('h2 == {0:s}'.format( other_data_kids[index].name)) if 'Characters' != other_data_kids[index].contents[-1]: raise exceptions.FailedToReloadError( other_data_kids[index].contents[-1]) tag_for_reviews = main_content_other_data.find( text='More reviews').parent link_for_reviews = request.urljoin(consts.HOST_NAME, tag_for_reviews['href']) self.__parse_reviews(link_for_reviews) tag_for_recommendations = main_content_other_data.find( text='More recommendations').parent link_for_recommendations = request.urljoin( consts.HOST_NAME, tag_for_recommendations['href']) self.__parse_recommendations(link_for_recommendations) self._is_loaded = True