Python RequestManager примеры использования

Язык программирования: Python

Пространство имен/Пакет: managers.RequestManager

Класс/Тип: RequestManager

Примеров на hotexamples.com: 8

Python RequestManager - 8 примеров найдено. Это лучшие примеры Python кода для managers.RequestManager.RequestManager, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

RequestManager(3)

get_reddit_soup(2)

post_reddit_soup(1)

Пример #1

Показать файл

Файл: DubizzleDataExtractor.py Проект: RuslanDidykk/scraping_system

 def __init__(self):
     print self.DOMAIN
     self.logger = Logger(name='dubizzle_data_log')
     self.err_logger = Logger(name='err_dubizzle_data_log')
     self.request_manager = RequestManager()
     self.source_code_manager = SourceCodeManager()
     self.generator = Generator()
     self.db = DatabaseManager()

Пример #2

Показать файл

class DataExtractor:
    DOMAIN = 'dubicars.com'
    PROJECT_ID = 13

    PATH = 'phones/'

    def __init__(self):
        print self.DOMAIN
        self.logger = Logger(name='dubicars_data_log')
        self.err_logger = Logger(name='err_dubicars_data_log')
        self.request_manager = RequestManager()
        self.source_code_manager = SourceCodeManager()
        self.generator = Generator()
        self.db = DatabaseManager()
        self.trim_list = self.db.get_trim_list()

    def extract_data(self, url_data):
        print url_data
        url_id = url_data['id']
        url = url_data['url']
        listing_id = url_data['listing_id']

        data = {}

        response = self.request_manager.take_get_request(url)
        source_code = response['source_code']
        parsed_code = self.source_code_manager.parse_code(source_code)

        expired = parsed_code.find('img', {'class': 'sold'})
        if expired is not None:
            self.db.set_url_inactive(url_id)
            self.err_logger.error("EXPIRED " + str(url_data))

            return
        elif response['status_code'] == 404:
            self.db.set_url_inactive(url_id)
            self.err_logger.error("404 " + str(url_data))

            return

        try:
            marka = self.__find_make(parsed_code)

            year = self.__find_year(parsed_code)
            kilometres = self.__find_km(parsed_code)
            color = self.__find_color(parsed_code)
            specs = self.__find_specs(parsed_code)
            price = self.__find_price(parsed_code)
            model = self.__find_model(parsed_code, make=marka)
            trim = self.__find_trim(parsed_code, marka=marka, model=model)
            if trim == 'Other':
                self.db.set_url_processed(url_id)
                self.db.set_url_inactive(url_id)
                return
            phone = self.__find_phone(parsed_code)
        except Exception as exc:
            self.err_logger.error(str(exc) + str(url_data))

            self.db.set_url_processed(url_id)
            return
        try:
            data['year'] = int(year)
            data['price'] = int(price)
            data['kilometres'] = int(kilometres)
            data['color'] = color
            data['specs'] = specs
            data['trim'] = trim
            data['model'] = model
            data['make'] = marka
            data['phone'] = phone
            print data
        except Exception as exc:
            self.err_logger.error(str(exc) + url_data)

            self.db.set_url_processed(url_id)
            self.db.set_url_inactive(url_id)

            return

        self.db.insert_data(data=data,
                            listing_id=listing_id,
                            url=url,
                            source=self.DOMAIN)
        self.db.set_url_processed(url_id)

    def update_data(self, url_data):
        timestamp = generate_timestamp()
        url_id = url_data['id']
        listing_id = url_data['listing_id']
        print listing_id
        url = url_data['url']
        first_timestamp = url_data['timestamp']
        time_dif = first_timestamp - datetime.strptime(timestamp,
                                                       "%Y.%m.%d:%H:%M:%S")
        time_dif = time_dif.days

        response = self.request_manager.take_get_request(url)
        source_code = response['source_code']
        parsed_code = self.source_code_manager.parse_code(source_code)
        expired = parsed_code.find('img', {'class': 'sold'})
        if expired is not None:
            self.db.set_sold_status(listing_id=listing_id,
                                    days_for_selling=time_dif)
            #self.db.remove_listing(listing_id)
            self.db.set_url_inactive(url_id)
            return

        elif response['status_code'] == 404:
            print 404, listing_id
            self.db.set_sold_status(listing_id=listing_id,
                                    days_for_selling=time_dif)
            #self.db.remove_listing(listing_id)
            self.db.set_url_inactive(url_id)
            return

        try:
            price = self.__find_price(parsed_code)
        except:
            price = 0

        # days = self.__calc_days_on_market(listing_id)

        self.db.update_listing(listing_id=listing_id,
                               price=int(price),
                               days_on_market=time_dif)
        self.db.set_updated(listing_id=listing_id)

    def __find_make(self, code):
        try:
            make = self.__find_tag_by_text(code, text='Make:')
            return make
        except:
            return ''

    def __find_year(self, code):
        try:
            year = self.__find_tag_by_text(code, text='Year:')
            year_list = year.split()
            for year in year_list:
                try:
                    year = int(year)
                    return year
                except:
                    continue
        except:
            return ''

    def __find_km(self, code):
        try:
            km = self.__find_tag_by_text(code, text='Kilometers:')
            km = km.replace(",", "").replace(".", "").replace(" ", "")
            return int(km)
        except:
            return 0

    def __find_color(self, code):
        try:
            color = self.__find_tag_by_text(code, text='Color:')
            return color.strip()
        except:
            return ''

    def __find_specs(self, code):
        try:
            specs = self.__find_tag_by_text(code, text='Specs:')
            return specs.strip()
        except:
            return ''

    # ============= TRIM ===============
    # =====
    def __generateEditedTrims(self, marka, trim):
        for example_trim in self.trim_list:
            try:
                if len(example_trim['trim']) <= 3:
                    continue
            except:
                continue

            if '-' in example_trim['trim']:
                if example_trim['make'] == marka:

                    edited_example_trim = example_trim['trim'].replace(
                        '-', ' ')
                    if edited_example_trim in trim:
                        print example_trim['trim']
                        return example_trim['trim']

                    edited_example_trim = example_trim['trim'].replace(
                        '-', ' ').title()
                    if edited_example_trim in trim:
                        print example_trim['trim']
                        return example_trim['trim']
        return ''

    def __find_trim(self, code, marka, model):
        try:
            to_return_trim = ''
            not_edited_trim = self.__find_tag_by_text(code,
                                                      text='Model:').strip()
            trim = not_edited_trim.replace(model, '').strip()

            if len(trim.split()) == 0:
                print not_edited_trim, 'there is no Trim!!!!'
                return not_edited_trim.strip()

            for example_trim in self.trim_list:
                if example_trim['make'] == marka:
                    if example_trim['trim'] in trim:

                        if len(example_trim['trim']) <= 2:
                            if ' ' + example_trim[
                                    'trim'] + ' ' in ' ' + trim + ' ':
                                if len(example_trim['trim']) > len(
                                        to_return_trim):
                                    print example_trim['trim']
                                    to_return_trim = example_trim['trim']
                            continue

                        if len(example_trim['trim']) > len(to_return_trim):
                            print example_trim['trim']
                            to_return_trim = example_trim['trim']

            edited_trim = self.__generateEditedTrims(marka=marka, trim=trim)
            if len(edited_trim) > len(to_return_trim):
                return edited_trim
            elif to_return_trim == '':
                if len(trim.split()) <= 2 and len(trim.split()) > 0:
                    return trim
            else:
                return to_return_trim
        except:
            return ''

    # =====
    # ============= TRIM ===============

    def __find_model(self, code, make):
        try:
            breadcrumbs = code.findAll('span', {'typeof': 'v:Breadcrumb'})
            name = breadcrumbs[-1].text
            len_make = len(make.split())
            trim = name.split()[len_make:]
            trim = ' '.join(trim)
            return trim.strip()
        except Exception as exc:
            print exc
            return ''

    def __find_phone(self, code):
        try:
            phone = code.find('p', {
                'id': 'contact-buttons'
            }).find('a')['data-reveal']
            phone = phone.replace('"',
                                  "").replace(" ",
                                              "").replace("[",
                                                          "").replace("]", "")
            return phone.strip()
        except Exception as exc:
            print exc
            return ''

    def __find_price(self, code):
        try:
            price = code.find('strong', {'class': 'money'}).text
            price = price.replace('AED', "").replace(" ", "").\
                replace(",", "").\
                replace(".", "").\
                replace("-", "")
            return int(price)
        except:
            try:
                price = code.find('strong', {'class': 'money reduced'}).text
                price = price.replace('AED', "").replace(" ", ""). \
                    replace(",", ""). \
                    replace(".", ""). \
                    replace("-", "")
                return int(price)
            except:
                return 0

    def __find_tag_by_text(self, code, text):

        tag_with_text = code.find(text=text)
        needed_tag = tag_with_text.parent.find_next_sibling()
        return needed_tag.text

Пример #3

Показать файл

Файл: DubizzleDataExtractor.py Проект: RuslanDidykk/scraping_system

class DataExtractor:
    DOMAIN = 'dubai.dubizzle.com'
    PROJECT_ID = 13

    PATH = 'phones/'

    def __init__(self):
        print self.DOMAIN
        self.logger = Logger(name='dubizzle_data_log')
        self.err_logger = Logger(name='err_dubizzle_data_log')
        self.request_manager = RequestManager()
        self.source_code_manager = SourceCodeManager()
        self.generator = Generator()
        self.db = DatabaseManager()

    def extract_data(self, url_data):
        print url_data
        url_id = url_data['id']
        url = url_data['url']
        listing_id = url_data['listing_id']

        data = {}

        response = self.request_manager.take_get_request(url)
        source_code = response['source_code']
        parsed_code = self.source_code_manager.parse_code(source_code)

        expired = parsed_code.find('div', {'id': 'expired-ad-message'})
        if expired is not None:
            #self.db.remove_listing(listing_id)
            self.db.set_url_inactive(url_id)
            self.err_logger.error("EXPIRED " + str(url_data))

            return
        elif response['status_code'] == 404:
            #self.db.remove_listing(listing_id)
            self.err_logger.error("404 " + str(url_data))

            self.db.set_url_inactive(url_id)

            return

        bread = parsed_code.find('span', {'id': 'browse_in_breadcrumb'})
        items = bread.findAll('div')
        try:
            year = parsed_code.find('img', attrs={
                'alt': 'Year'
            }).parent.text.replace('Year', '').strip()

            kilometres = parsed_code.find('img', attrs={
                'alt': 'Kilometers'
            }).parent.text.replace('Kilometers',
                                   '').strip().replace(',',
                                                       '').replace('.', '')
            color = parsed_code.find('img', attrs={
                'alt': 'Color'
            }).parent.text.replace('Color', '').strip()
            specs = parsed_code.find('img', attrs={
                'alt': 'Specs'
            }).parent.text.replace('Specs', '').strip()
            trim = parsed_code.find('img', attrs={
                'alt': 'Trim'
            }).parent.parent.text.replace('Trim', '').strip()
            if trim == 'Other':
                self.db.set_url_processed(url_id)
                return
            price = parsed_code.find('span', {
                'id': 'actualprice'
            }).text.replace(',', '').replace('.', '')
            model = items[-1].find('a').text.strip()
            marka = items[-2].find('a').text.strip()
            phone = self.extract_phone(parsed_code, id=url_id)
        except Exception as exc:
            self.err_logger.error(str(exc) + str(url_data))
            self.db.set_url_processed(url_id)

            return

        data['year'] = int(year)
        data['price'] = int(price)
        data['kilometres'] = int(kilometres)
        data['color'] = color
        data['specs'] = specs
        data['trim'] = trim
        data['model'] = model
        data['make'] = marka
        data['phone'] = phone

        self.db.insert_data(data=data,
                            listing_id=listing_id,
                            url=url,
                            source=self.DOMAIN)
        self.db.set_url_processed(url_id)

    def update_data(self, url_data):
        timestamp = generate_timestamp()
        url_id = url_data['id']
        listing_id = url_data['listing_id']
        print listing_id
        url = url_data['url']
        first_timestamp = url_data['timestamp']
        time_dif = first_timestamp - datetime.strptime(timestamp,
                                                       "%Y.%m.%d:%H:%M:%S")
        time_dif = time_dif.days

        response = self.request_manager.take_get_request(url)
        source_code = response['source_code']
        parsed_code = self.source_code_manager.parse_code(source_code)
        expired = parsed_code.find('div', {'id': 'expired-ad-message'})
        if expired is not None:
            self.db.set_sold_status(listing_id=listing_id,
                                    days_for_selling=time_dif)
            #self.db.remove_listing(listing_id)
            self.db.set_url_inactive(url_id)
            print "updated"

            return

        elif response['status_code'] == 404:
            print 404, listing_id
            self.db.set_sold_status(listing_id=listing_id,
                                    days_for_selling=time_dif)
            #self.db.remove_listing(listing_id)
            self.db.set_url_inactive(url_id)
            print "updated"

            return

        try:
            price = parsed_code.find('span', {
                'id': 'actualprice'
            }).text.replace(',', '').replace('.', '')
        except:
            price = 0

        # days = self.__calc_days_on_market(listing_id)

        self.db.update_listing(listing_id=listing_id,
                               price=int(price),
                               days_on_market=time_dif)
        self.db.set_updated(listing_id=listing_id)
        print "updated"

    # def __calc_days_on_market(self, listing_id):
    #     days_on_market = self.db.get_car_data(listing_id).days_on_market
    #     if days_on_market is None:
    #         return 0
    #     days_on_market += 1
    #     return days_on_market

    def extract_phone(self, code, id):
        img = code.find('img', {'class': 'phone-num-img'})['src']

        ext = img.partition('data:image/')[2].split(';')[0]
        with open(self.PATH + str(id) + '.' + ext, 'wb') as f:
            f.write(ba.a2b_base64(img.partition('base64,')[2]))

        text = textract.process(self.PATH + str(id) + '.' + ext).replace(
            ' ', '')

        if '+971' in text:
            pass
        else:
            text = '+971' + text

        os.remove(self.PATH + str(id) + '.' + ext)
        return text.strip()

Пример #4

Показать файл

 def __init__(self):
     self._request_manager = RequestManager()

Пример #5

Показать файл

class CommentScraper:
    def __init__(self):
        self._request_manager = RequestManager()

    def scrape_comments(self, url, sort_by):
        if url.startswith("https://www"):
            url = url.replace("www", "old", 1)

        soup = self._request_manager.get_reddit_soup(url)
        return self._parse_comments_from_document(soup)

    def _parse_comments_from_document(self, document, get_children=False):
        comment_objects_list = []

        try:
            container = document.find_all("div", class_=["nestedlisting"])[0]
        except IndexError:
            return comment_objects_list

        container_comments = container.find_all("div", class_="comment")
        if (get_children and len(container_comments) == 1):
            return comment_objects_list

        first_comment = (container.find_all(
            "div", class_="comment")[1 if get_children else 0])

        comment_objects_list.append(self._extract_comment_data(first_comment))

        for sibling in first_comment.next_siblings:
            is_tag = isinstance(sibling, Tag)
            is_comment = "comment" in sibling["class"]
            is_morechildren = "morechildren" in sibling["class"]

            if is_tag and is_comment:
                comment_objects_list.append(
                    self._extract_comment_data(sibling))
            elif is_tag and is_morechildren:
                subreddit = (document.find(
                    "link", {"rel": "canonical"})["href"].split("/")[4])

                comment_objects_list.extend(
                    self._get_more_comments(sibling, subreddit))

        return comment_objects_list

    def _extract_comment_data(self, comment_tag, recursive=True):
        top_level_comment_object = {}

        score_tag = comment_tag.find("span", class_="score unvoted")
        score = score_tag["title"] if score_tag is not None else "???"
        author_tag = comment_tag.find("a", class_="author")
        author = (author_tag.text.strip()
                  if author_tag is not None else "[deleted]")

        date_posted = comment_tag.find("time", class_="live-timestamp")
        date_posted_timestamp = date_posted["datetime"]
        date_posted_readable = date_posted["title"]

        date_edited = comment_tag.find("time", class_="edited-timestamp")
        date_edited_timestamp = (date_edited["datetime"]
                                 if date_edited is not None else None)

        num_children = int(
            comment_tag.find("a", class_="numchildren").text.strip().replace(
                "(", "").replace(")", "").split(" ")[0])

        permalink_old = comment_tag.find("a", class_="bylink")["href"]
        permalink = permalink_old.replace("old", "www", 1)

        comment_container = comment_tag.find(
            "div", class_="usertext-body may-blank-within md-container").find(
                "div", class_="md")

        comment_formatted = comment_container.prettify()
        comment_raw = " ".join([
            p.text for p in comment_container.find_all("p")
        ]).strip().rstrip()

        top_level_comment_object["score"] = score
        top_level_comment_object["author"] = author
        top_level_comment_object["date_posted_timestamp"] = (
            date_posted_timestamp)
        top_level_comment_object["date_posted_readable"] = date_posted_readable
        top_level_comment_object["date_edited_timestamp"] = (
            date_edited_timestamp)
        top_level_comment_object["num_children"] = num_children
        top_level_comment_object["permalink_old"] = permalink_old
        top_level_comment_object["permalink"] = permalink
        top_level_comment_object["comment_formatted"] = comment_formatted
        top_level_comment_object["comment_raw"] = comment_raw

        if num_children == 0 or not recursive:
            return top_level_comment_object
        else:
            nested_soup = self._request_manager.get_reddit_soup(permalink_old)
            parsed_replies = self._parse_comments_from_document(
                nested_soup, True)

            if len(parsed_replies) == 0:
                return top_level_comment_object

            top_level_comment_object["replies"] = parsed_replies

            return top_level_comment_object

    def _get_more_comments(self, morecomment_tag, subreddit):
        morecomments_args = (morecomment_tag.a["onclick"].replace(
            "return morechildren",
            "").replace("(", "").replace(")", "").replace("'", "").split(","))

        data_id = morecomment_tag["data-fullname"]
        link_id = morecomments_args[1].strip()
        sort = morecomments_args[2].strip()
        renderstyle = "html"
        limit_children = False
        r = subreddit
        children = (",".join(morecomments_args[3:len(morecomments_args) -
                                               1]).strip())

        payload = {
            "id": data_id,
            "link_id": link_id,
            "sort": sort,
            "renderstyle": renderstyle,
            "limit_children": limit_children,
            "r": r,
            "children": children
        }

        more_soup = self._request_manager.post_reddit_soup(
            "https://old.reddit.com/api/morechildren", payload)

        json_comments = json.loads(more_soup.prettify())
        json_comments_list = json_comments["jquery"][10][3][0]

        more_comments = []
        for comment in json_comments_list:
            comment_content = comment["data"]["content"]
            comment_tag_string = html.unescape(comment_content)
            comment_tag_soup = BeautifulSoup(comment_tag_string, "html.parser")
            if comment["kind"] == "more":
                more_comments.extend(
                    self._get_more_comments(
                        comment_tag_soup.find("div", class_="morechildren"),
                        subreddit))
            else:
                more_comments.append(
                    self._extract_comment_data(comment_tag_soup))

        return more_comments

Пример #6

Показать файл

Файл: CarswitchDataExtractor.py Проект: RuslanDidykk/scraping_system

 def __init__(self):
     self.request_manager = RequestManager()
     self.source_code_manager = SourceCodeManager()
     self.generator = Generator()
     self.db = DatabaseManager()

Пример #7

Показать файл

class PostScraper:
    def __init__(self):
        self._request_manager = RequestManager()

    def _get_posts_from_first_soup(self, first_soup, limit):
        try:
            script_data = first_soup.select('script#data')
            script_data_content = json.dumps(script_data[0].contents[0])
            script_data_content = (script_data_content.replace(
                "window.___r = ", ""))
            script_data_content = json.loads(script_data_content)
            script_data_content_len = len(script_data_content)
            script_data_content = (
                script_data_content[:script_data_content_len - 1])

            script_data_dictionary = json.loads(script_data_content)
            script_data_list = list(
                script_data_dictionary["posts"]["models"].values())
            filtered_list = [
                post for post in script_data_list
                if post["belongsTo"]["type"] == "subreddit" and
                not post['isStickied'] and post['crosspostParentId'] is None
            ]

            if (len(filtered_list) < limit):
                return filtered_list
            return filtered_list[:limit]
        except IndexError:
            return []

    def _get_posts_after_first_soup(self, soup, limit):
        post_list = list(json.loads(soup.text)["posts"].values())
        filtered_list = [
            post for post in post_list
            if post["belongsTo"]["type"] == "subreddit"
            and not post["isStickied"] and post["crosspostParentId"] is None
        ]

        if (len(filtered_list) < limit):
            return filtered_list

        return filtered_list[:limit]

    def _get_processed_posts(self, posts, return_keys=[], verbose=False):
        post_objects = []
        post_ids = []

        for value in posts:
            post_object = {}
            if (len(return_keys) > 0):
                for return_key in return_keys:
                    post_object[return_key] = value[return_key]
            elif (verbose):
                post_object = value
            else:
                post_object["id"] = value["id"]
                post_object["title"] = value["title"]
                post_object["numComments"] = value["numComments"]
                post_object["created"] = value["created"]
                post_object["score"] = value["score"]
                post_object["author"] = value["author"]
                post_object["upvoteRatio"] = value["upvoteRatio"]
                post_object["permalink"] = value["permalink"]
                post_object["media"] = value["media"]

            post_ids.append(value["id"])
            post_objects.append(post_object)

        return post_objects, post_ids

    def scrape_posts(self, subreddit, limit, sort_by, verbose):
        post_objects_list = []
        post_ids_list = []
        posts_count = 0

        subreddit_entered = subreddit is not None and len(subreddit) > 0

        while (posts_count < limit):
            if (posts_count == 0):
                url = BASE_URL
                if (subreddit_entered):
                    url += "/r/{sub_name}"
                    url = url.format(sub_name=subreddit)
                url += "/{sort_by}"
                url = url.format(sort_by=sort_by)

                subreddit_post_soup = self._request_manager.get_reddit_soup(
                    url)
                posts = self._get_posts_from_first_soup(
                    subreddit_post_soup, limit)
            else:
                remaining_limit = limit - posts_count
                if (subreddit_entered):
                    url = URL_AFTER_ID.format(sub_name=subreddit,
                                              last_id=post_ids_list[-1],
                                              sort_by=sort_by)
                else:
                    url = BASE_URL + "/{sort_by}/?after={last_id}"
                    url = url.format(sort_by=sort_by,
                                     last_id=post_ids_list[-1])

                subreddit_post_soup = self._request_manager.get_reddit_soup(
                    url)

                posts = (self._get_posts_after_first_soup(
                    subreddit_post_soup, remaining_limit) if
                         (subreddit_entered) else
                         self._get_posts_from_first_soup(
                             subreddit_post_soup, remaining_limit))

            post_objects, post_ids = self._get_processed_posts(posts,
                                                               return_keys=[],
                                                               verbose=verbose)
            post_objects_list.extend(post_objects)
            post_ids_list.extend(post_ids)
            posts_count = len(post_objects_list)

            if (posts_count == 0):
                break

        return post_objects_list

Пример #8

Показать файл

class LinksExtractor:

    DOMAIN = 'dubicars.com'
    PROJECT_ID = 13

    def __init__(self):
        self.request_manager = RequestManager()
        self.source_code_manager = SourceCodeManager()
        self.generator = Generator()
        self.db = DatabaseManager()

    def __createUrl(self, templateUrl, page):
        #url = templateUrl[:-1] + str(page)
        url = templateUrl.format(page)
        return url

    def findLinks(self, sourceCode):
        links = []
        status = True
        sourceCode = sourceCode.find('section', {'data-item-hash': "search"})
        listOfTags = sourceCode.findAll('li')
        for block in listOfTags:
            try:
                data = block['data-sp-item']
            except:
                continue
            data = json.loads(data)
            listing_id = data['id']
            try:
                km = int(data['km'])
            except:
                km = 101

            if km < 100:
                continue

            tag_a = block.find('a')
            href = tag_a['href']
            links.append({'url': href, 'listing_id': listing_id})
        return {'links': links, 'status': status}

    def main(self, sourceUrl):
        page = 1
        while True:
            url = self.__createUrl(sourceUrl, page)
            print url
            try:
                response = self.request_manager.take_get_request(
                    url, proxy_using=False)
            except Exception as exc:
                print exc
                break
            parseSourceCode = self.source_code_manager.parse_code(
                response['source_code'])

            links_data = self.findLinks(parseSourceCode)
            links = links_data['links']

            self.db.insert_urls(urls_list=links, source=self.DOMAIN)

            if self.isLastPage(parseSourceCode):
                print "last"
                break
            page += 1

    def find_last_page(self, code):
        pagination = code.find('div', {'class': 'paging '})
        pages = pagination.findAll('a')
        last_page = int(pages[-2].text)
        print last_page
        return last_page

    def isLastPage(self, code):
        next_page = code.find('a', {'class': 'next'})
        if next_page is None:
            return True
        else:
            return False