Пример #1
0
class SofifaTeamUrlsSpider(CrawlSpider):

    name = 'team_pages'
    allowed_domains = ['sofifa.com']
    start_urls = ['https://sofifa.com/teams/national/']

    rules = (
        Rule(LinkExtractor(deny=([
            r'\?', r'/[0-9]+', r'/forgot', r'/shortlist', r'/authorize',
            r'/leagues', r'/squad', r'/help', r'/compare', r'/players',
            r'/teams'
        ])),
             callback='parse_start_url',
             follow=True),
        # Rule(LinkExtractor(restrict_xpaths="//a[text()='Next']"), callback='parse_item', follow=True)
    )

    custom_settings = sofifa_settings(name=name,
                                      proxies=proxies,
                                      user_agent=user_agent,
                                      collection='team_urls',
                                      validator='TeamItem')

    def parse_start_url(self, response):
        """
        Parse main page for data that is not available in extracted links.
        """

        for row in response.xpath(
                "//table[@class='table table-hover persist-area']/tbody/tr"):

            loader = ItemLoader(item=NationalTeamStats(),
                                selector=row,
                                response=response)

            loader.add_xpath('id', ".//a[contains(@href, 'team/')]/@href")
            loader.add_xpath('nationality',
                             ".//a[contains(@href, 'teams?na')]/text()")
            loader.add_xpath('region',
                             ".//a[contains(@href, 'teams?ct')]/text()")
            loader.add_xpath(
                'num_players',
                ".//td[@class='col text-center'][last()]/div/text()")
            loader.add_xpath(
                'hits',
                ".//div[@class='col-comments text-right text-ellipsis rtl']/text()"
            )
            loader.add_xpath(
                'comments',
                ".//div[@class='col-comments text-right text-ellipsis rtl']/text()"
            )
            loader.add_xpath('team_page',
                             ".//a[contains(@href, 'team/')]/@href")

            yield loader.load_item()
class SofifaPlayerURLsSpider(CrawlSpider):

    name = 'player_pages'

    allowed_domains = ['sofifa.com']
    start_urls = ['https://sofifa.com/players/']

    rules = (
        Rule(LinkExtractor(deny=([r'\?', r'[0-9]+/[0-9]+/', r'/changeLog', r'/live', r'/squads', r'/calculator/',
                                  r'/team/', r'[0-9]+', r'/[a-zA-Z0-9]+$'])),
             callback='parse_item', follow=True),
        Rule(LinkExtractor(restrict_xpaths="//a[text()='Next']"), callback='parse_item', follow=True)
    )

    custom_settings = sofifa_settings(name=name, proxies=proxies, user_agent=user_agent, collection='player_urls',
                                      validator='PlayerItem')

    def parse_item(self, response):

        """
        @url http://sofifa.com/players/
        @returns items 1 61
        @returns requests 0 0
        @scrapes id_player_main total_stats hits comments player_page
        """

        self.crawler.stats.set_value('page_counter', page_counter(response.url))

        for row in response.xpath("//table[@class='table table-hover persist-area']/tbody/tr"):
            loader = ItemLoader(item=MainPageItem(), selector=row, response=response)

            loader.add_xpath('id', ".//a[contains(@href, 'player/')]/@href")
            loader.add_xpath('total_stats', ".//div[@class='col-digit col-tt']/text()")
            loader.add_xpath('hits', ".//div[@class='col-comments text-right text-ellipsis rtl']/text()")
            loader.add_xpath('comments', ".//div[@class='col-comments text-right text-ellipsis rtl']/text()")
            loader.add_xpath('player_page', ".//a[contains(@href, 'player/')]/@href")

            print(response.request.headers['User-Agent'])

            self.logger.info(f'Currently on page {current_page(response.url)}')

            yield loader.load_item()
Пример #3
0
class SofifaPlayerPagesSpider(scrapy.Spider):

    name = 'player_details'

    custom_settings = sofifa_settings(name=name,
                                      proxies=proxies,
                                      user_agent=user_agent,
                                      collection='player_details',
                                      validator='PlayerItem')
    client = MongoClient('localhost', 27017)
    db = client.sofifa
    collection = db.player_urls

    urls = [
        x["player_page"]
        for x in collection.find({'player_page': {
            '$exists': 'true'
        }})
    ]

    def start_requests(self):

        for url in self.urls:
            yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response):

        self.crawler.stats.set_value('pages_to_visit', len(self.urls))

        loader = ItemLoader(item=SofifaItem(), response=response)
        col_4_loader = loader.nested_xpath(
            ".//div[@class='column col-4 text-center']")

        # GENERAL PLAYER INFORMATION

        loader.add_xpath('id', ".//div[@class='info']/h1/text()")
        loader.add_xpath('name', ".//div[@class='info']/h1/text()")
        loader.add_xpath('full_name', ".//div[@class='meta']/text()")
        loader.add_xpath(
            'age',
            ".//div[@class='meta']/text()/following-sibling::text()[last()]")
        loader.add_xpath(
            'dob',
            ".//div[@class='meta']/text()/following-sibling::text()[last()]")
        loader.add_xpath(
            'height',
            ".//div[@class='meta']/text()/following-sibling::text()[last()]")
        loader.add_xpath(
            'weight',
            ".//div[@class='meta']/text()/following-sibling::text()[last()]")
        loader.add_xpath('nationality', ".//div[@class='meta']/a/@title")

        # GENERAL PLAYER STATS

        loader.add_xpath(
            'preferred_foot',
            "(.//label[text()='Preferred Foot']/following::text())[1]")
        loader.add_xpath(
            'international_reputation',
            "(.//label[text()='International Reputation']/following::text())[1]"
        )
        loader.add_xpath(
            'weak_foot', "(.//label[text()='Weak Foot']/following::text())[1]")
        loader.add_xpath(
            'skill_moves',
            "(.//label[text()='Skill Moves']/following::text())[1]")
        loader.add_xpath(
            'work_rate',
            "(.//label[text()='Work Rate']/following::span/text())[1]")
        loader.add_xpath(
            'body_type',
            "(.//label[text()='Body Type']/following::span/text())[1]")
        loader.add_xpath(
            'real_face',
            "(.//label[text()='Real Face']/following::span/text())[1]")

        # CLUB/TEAM INFORMATION

        col_4_loader.add_xpath(
            'value',
            "following::text()[contains(., 'Value')]/following::span[1]/text()"
        )
        col_4_loader.add_xpath(
            'wage',
            "following::text()[contains(., 'Wage')]/following::span[1]/text()")
        loader.add_xpath(
            'release_clause',
            "(.//label[text()='Release Clause']/following::span/text())[1]")
        loader.add_xpath('club_name', "(.//ul[@class='pl']//a/text())[1]")
        loader.add_xpath(
            'club_rating',
            ".//div[@class='column col-4'][3]/ul/li[2]/span/text()")
        loader.add_xpath(
            'club_position',
            "(.//label[text()='Position']/following::text()[1])[1]")
        loader.add_xpath(
            'club_jersey_number',
            "(.//label[text()='Jersey Number']/following::text()[1])[1]")
        loader.add_xpath('club_join_date',
                         ".//label[text()='Joined']/following::text()[1]")
        loader.add_xpath(
            'loaned_from',
            ".//label[text()='Loaned From']/following::a[1]/text()")
        loader.add_xpath(
            'club_contract_end_date',
            ".//label[text()='Contract Valid Until']/following::text()[1]")
        loader.add_xpath('team_name', "(.//ul[@class='pl']//a/text())[2]")
        loader.add_xpath(
            'team_rating',
            ".//div[@class='column col-4'][4]/ul/li[2]/span/text()")
        loader.add_xpath(
            'team_position',
            "(.//label[text()='Position']/following::text()[1])[2]")
        loader.add_xpath(
            'team_jersey_number',
            "(.//label[text()='Jersey Number']/following::text()[1])[2]")

        # PLAYER GAME STATS

        loader.add_xpath(
            'overall_rating', "(.//div[@class='column col-4 text-center']"
            "/preceding::text()[contains(.,'Overall Rating')])[2]/following::span[1]/text()"
        )
        col_4_loader.add_xpath(
            'potential_rating',
            "following::text()[contains(., 'Potential')]/following::span[1]"
            "/text()")
        loader.add_xpath('positions', ".//div[@class='meta']/span/text()")
        loader.add_xpath('unique_attributes', ".//div[@class='mt-2']/a/text()")

        if 'GK' in response.xpath(
                ".//div[@class='meta']/span/text()").getall():

            loader.add_xpath(
                'DIV',
                ".//div[@class='wrapper']//script[contains(text(), 'var overallRating')]/text()"
            )
            loader.add_xpath(
                'HAN',
                ".//div[@class='wrapper']//script[contains(text(), 'var overallRating')]/text()"
            )
            loader.add_xpath(
                'KIC',
                ".//div[@class='wrapper']//script[contains(text(), 'var overallRating')]/text()"
            )
            loader.add_xpath(
                'REF',
                ".//div[@class='wrapper']//script[contains(text(), 'var overallRating')]/text()"
            )
            loader.add_xpath(
                'SPD',
                ".//div[@class='wrapper']//script[contains(text(), 'var overallRating')]/text()"
            )
            loader.add_xpath(
                'POS',
                ".//div[@class='wrapper']//script[contains(text(), 'var overallRating')]/text()"
            )

        else:

            loader.add_xpath(
                'PAC',
                ".//div[@class='wrapper']//script[contains(text(), 'var overallRating')]/text()"
            )
            loader.add_xpath(
                'SHO',
                ".//div[@class='wrapper']//script[contains(text(), 'var overallRating')]/text()"
            )
            loader.add_xpath(
                'PAS',
                ".//div[@class='wrapper']//script[contains(text(), 'var overallRating')]/text()"
            )
            loader.add_xpath(
                'DRI',
                ".//div[@class='wrapper']//script[contains(text(), 'var overallRating')]/text()"
            )
            loader.add_xpath(
                'DEF',
                ".//div[@class='wrapper']//script[contains(text(), 'var overallRating')]/text()"
            )
            loader.add_xpath(
                'PHY',
                ".//div[@class='wrapper']//script[contains(text(), 'var overallRating')]/text()"
            )

        # PLAYER DETAILED STATS

        loader.add_xpath('crossing', "(.//span[../span='Crossing']/text())[1]")
        loader.add_xpath('finishing',
                         "(.//span[../span='Finishing']/text())[1]")
        loader.add_xpath('heading_accuracy',
                         "(.//span[../span='Heading Accuracy']/text())[1]")
        loader.add_xpath('short_passing',
                         "(.//span[../span='Short Passing']/text())[1]")
        loader.add_xpath('volleys', "(.//span[../span='Volleys']/text())[1]")
        loader.add_xpath('aggression',
                         "(.//span[../span='Aggression']/text())[1]")
        loader.add_xpath('interceptions',
                         "(.//span[../span='Interceptions']/text())[1]")
        loader.add_xpath('positioning',
                         "(.//span[../span='Positioning']/text())[1]")
        loader.add_xpath('vision', "(.//span[../span='Vision']/text())[1]")
        loader.add_xpath('penalties',
                         "(.//span[../span='Penalties']/text())[1]")
        loader.add_xpath('composure',
                         ".//li[contains(text(), 'Composure')]/span/text()")
        loader.add_xpath('dribbling',
                         "(.//span[../span='Dribbling']/text())[1]")
        loader.add_xpath('curve', "(.//span[../span='Curve']/text())[1]")
        loader.add_xpath('fk_accuracy',
                         "(.//span[../span='FK Accuracy']/text())[1]")
        loader.add_xpath('long_passing',
                         "(.//span[../span='Long Passing']/text())[1]")
        loader.add_xpath('ball_control',
                         "(.//span[../span='Ball Control']/text())[1]")
        loader.add_xpath('marking', "(.//span[../span='Marking']/text())[1]")
        loader.add_xpath('standing_tackle',
                         "(.//span[../span='Standing Tackle']/text())[1]")
        loader.add_xpath('sliding_tackle',
                         "(.//span[../span='Sliding Tackle']/text())[1]")
        loader.add_xpath('acceleration',
                         "(.//span[../span='Acceleration']/text())[1]")
        loader.add_xpath('sprint_speed',
                         "(.//span[../span='Sprint Speed']/text())[1]")
        loader.add_xpath('agility', "(.//span[../span='Agility']/text())[1]")
        loader.add_xpath('reactions',
                         "(.//span[../span='Reactions']/text())[1]")
        loader.add_xpath('balance', "(.//span[../span='Balance']/text())[1]")
        loader.add_xpath('gk_diving',
                         ".//li[contains(text(), 'GK Diving')]/span/text()")
        loader.add_xpath('gk_handling',
                         ".//li[contains(text(), 'GK Handling')]/span/text()")
        loader.add_xpath('gk_kicking',
                         ".//li[contains(text(), 'GK Kicking')]/span/text()")
        loader.add_xpath(
            'gk_positioning',
            ".//li[contains(text(), 'GK Positioning')]/span/text()")
        loader.add_xpath('gk_reflexes',
                         ".//li[contains(text(), 'GK Reflexes')]/span/text()")
        loader.add_xpath('shot_power',
                         "(.//span[../span='Shot Power']/text())[1]")
        loader.add_xpath('jumping', "(.//span[../span='Jumping']/text())[1]")
        loader.add_xpath('stamina', "(.//span[../span='Stamina']/text())[1]")
        loader.add_xpath('strength', "(.//span[../span='Strength']/text())[1]")
        loader.add_xpath('long_shots',
                         "(.//span[../span='Long Shots']/text())[1]")
        loader.add_xpath(
            'traits',
            ".//h5[text()='Traits']/following-sibling::ul/li/span/text()")

        # PLAYER REAL OVERALL RATING (POSITIONAL STATS)

        loader.add_xpath('LS', "(.//div[../div='LS']/following::text())[1]")
        loader.add_xpath('ST', "(.//div[../div='ST']/following::text())[1]")
        loader.add_xpath('RS', "(.//div[../div='RS']/following::text())[1]")
        loader.add_xpath('LW', "(.//div[../div='LW']/following::text())[1]")
        loader.add_xpath('LF', "(.//div[../div='LF']/following::text())[1]")
        loader.add_xpath('CF', "(.//div[../div='CF']/following::text())[1]")
        loader.add_xpath('RF', "(.//div[../div='RF']/following::text())[1]")
        loader.add_xpath('RW', "(.//div[../div='RW']/following::text())[1]")
        loader.add_xpath('LAM', "(.//div[../div='LAM']/following::text())[1]")
        loader.add_xpath('CAM', "(.//div[../div='CAM']/following::text())[1]")
        loader.add_xpath('RAM', "(.//div[../div='RAM']/following::text())[1]")
        loader.add_xpath('LM', "(.//div[../div='LM']/following::text())[1]")
        loader.add_xpath('LCM', "(.//div[../div='LCM']/following::text())[1]")
        loader.add_xpath('CM', "(.//div[../div='CM']/following::text())[1]")
        loader.add_xpath('RCM', "(.//div[../div='RCM']/following::text())[1]")
        loader.add_xpath('RM', "(.//div[../div='RM']/following::text())[1]")
        loader.add_xpath('LWB', "(.//div[../div='LWB']/following::text())[1]")
        loader.add_xpath('LDM', "(.//div[../div='LDM']/following::text())[1]")
        loader.add_xpath('CDM', "(.//div[../div='CDM']/following::text())[1]")
        loader.add_xpath('RDM', "(.//div[../div='RDM']/following::text())[1]")
        loader.add_xpath('RWB', "(.//div[../div='RWB']/following::text())[1]")
        loader.add_xpath('LB', "(.//div[../div='LB']/following::text())[1]")
        loader.add_xpath('LCB', "(.//div[../div='LCB']/following::text())[1]")
        loader.add_xpath('CB', "(.//div[../div='CB']/following::text())[1]")
        loader.add_xpath('RCB', "(.//div[../div='RCB']/following::text())[1]")
        loader.add_xpath('RB', "(.//div[../div='RB']/following::text())[1]")

        # COMMUNITY INFORMATION

        loader.add_xpath(
            'followers',
            "(.//div[@class='operation mt-2']/a/text()[contains(.,'Follow')]"
            "/following::span)[1]/text()")
        loader.add_xpath(
            'likes',
            "(.//div[@class='operation mt-2']/a/text()[contains(.,'Like')]"
            "/following::span)[1]/text()")
        loader.add_xpath(
            'dislikes',
            "(.//div[@class='operation mt-2']/a/text()[contains(.,'Dislike')]"
            "/following::span)[1]/text()")

        # MEDIA

        loader.add_xpath('face_img', ".//div/div/article/div/img//@data-src")
        loader.add_xpath('flag_img', ".//div[@class='meta']/a/img/@data-src")
        loader.add_xpath('club_logo_img',
                         "(.//div/ul/li/figure/img/@data-src)[1]")
        loader.add_xpath('team_logo_img',
                         "(.//div/ul/li/figure/img/@data-src)[2]")

        self.logger.info(f'Parse function called on {response.url}')

        self.logger.info(
            f"Currently on page {self.crawler.stats.get_value('page_counter')} out of "
            f"{self.crawler.stats.get_value('pages_to_visit')}")

        # TODO: enable continued logging of page_counter after a pause/resume.
        self.crawler.stats.inc_value(key='page_counter', count=1, start=0)

        print(response.request.headers['User-Agent'])
        print(
            f"{self.crawler.stats.get_value('page_counter')} out of {self.crawler.stats.get_value('pages_to_visit')}"
        )

        yield loader.load_item()
Пример #4
0
class SofifaTeamPagesSpider(scrapy.Spider):

    name = 'team_details'
    allowed_domains = ['sofifa.com']

    custom_settings = sofifa_settings(name=name, proxies=proxies, user_agent=user_agent, collection='team_details',
                                      validator='TeamItem')

    def start_requests(self):

        client = MongoClient('localhost', 27017)
        db = client.sofifa
        collection = db.team_urls

        urls = [x["team_page"] for x in collection.find({'team_page': {'$exists': 'true'}})]

        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response):

        loader = ItemLoader(NationalTeamDetailedStats(), response=response)
        mt_2_loader = loader.nested_xpath(".//div[@class='operation mt-2']/a")
        col_6_loader = loader.nested_xpath(".//div[@class='column col-6']")

        # GENERAL CLUB INFORMATION

        loader.add_xpath('id', ".//div[@class='info']/h1/text()")
        loader.add_xpath('team_name', ".//div[@class='info']/h1/text()")
        loader.add_xpath('team_logo', ".//div[@class='card card-border player fixed-width']/img/@data-src")
        loader.add_xpath('flag', ".//div[@class='meta']//a[last()-1]//img/@data-src")

        # GENERAL TEAM STATS

        loader.add_xpath('overall', "(.//div[@class='column col-4 text-center']"
                                    "/preceding::text()[contains(.,'Overall')])[2]/following::span[1]/text()")
        loader.add_xpath('attack', "(.//div[@class='column col-4 text-center']"
                                   "/preceding::text()[contains(.,'Attack')])[2]/following::span[1]/text()")
        loader.add_xpath('midfield', "(.//div[@class='column col-4 text-center']"
                                     "/preceding::text()[contains(.,'Midfield')])[2]/following::span[1]/text()")
        loader.add_xpath('defence', "(.//div[@class='column col-4 text-center']"
                                    "/following::text()[contains(.,'Defence')])[1]/following::span[1]/text()")

        # DETAILED TEAM STATS

        col_6_loader.add_xpath('home_stadium', ".//following::label[contains(., 'Home Stadium')]/following::text()[1]")
        col_6_loader.add_xpath('rival_team', ".//following::label[contains(., 'Rival Team')]/following::a[1]/text()")
        col_6_loader.add_xpath('international_prestige', ".//following::label[contains(., 'International Prestige')]"
                                                         "/following::span[1]/text()")
        col_6_loader.add_xpath('starting_xi_average_age', ".//following::label[contains(., 'Starting XI Average Age')]"
                                                          "/following::text()[1]")
        col_6_loader.add_xpath('whole_team_average_age', ".//following::label[contains(., 'Whole Team Average Age')]"
                                                         "/following::text()[1]")
        col_6_loader.add_xpath('captain', ".//following::label[contains(., 'Captain')]/following::a[1]/@href")
        col_6_loader.add_xpath('short_free_kick', ".//following::label[text()='Short Free Kick']/following::a[1]/@href")
        col_6_loader.add_xpath('long_free_kick', ".//following::label[text()='Long Free Kick']/following::a[1]/@href")
        col_6_loader.add_xpath('left_short_free_kick', ".//following::label[text()='Left Short Free Kick']"
                                                       "/following::a[1]/@href")
        col_6_loader.add_xpath('right_short_free_kick', ".//following::label[text()='Right Short Free Kick']"
                                                        "/following::a[1]/@href")
        col_6_loader.add_xpath('penalties', ".//following::label[text()='Penalties']/following::a[1]/@href")
        col_6_loader.add_xpath('left_corner', ".//following::label[text()='Left Corner']/following::a[1]/@href")
        col_6_loader.add_xpath('right_corner', ".//following::label[text()='Right Corner']/following::a[1]/@href")
        loader.add_xpath('starting_xi', ".//div[@class='field-player']/a/@href")

        # TACTICS

        loader.add_xpath('defence_defensive_style', ".//dl//span/preceding::dd[text()='Defensive Style']"
                                                    "/span/span/text()")
        loader.add_xpath('defence_team_width', "(.//dl//span/preceding::span[text()='Team Width']"
                                               "/following::div/meter)[1]/@value")
        loader.add_xpath('defence_depth', "(.//dl//span/preceding::span[text()='Depth']"
                                          "/following::div/meter)[1]/@value")
        loader.add_xpath('offense_offensive_style', ".//dl//span/preceding::dd[text()='Offensive Style']"
                                                    "/span/span/text()")
        loader.add_xpath('offense_width', "(.//dl//span/preceding::span[text()='Width']/following::div/meter)[1]"
                                          "/@value")
        loader.add_xpath('offense_players_in_box', "(.//dl//span/preceding::span[text()='Players in box']"
                                                   "/following::div/meter)[1]/@value")
        loader.add_xpath('offense_corners', "(.//dl//span/preceding::span[text()='Corners']"
                                            "/following::div/meter)[1]/@value")
        loader.add_xpath('offense_free_kicks', "(.//dl//span/preceding::span[text()='Free Kicks']"
                                               "/following::div/meter)[1]/@value")
        loader.add_xpath('build_up_play_speed', ".//dl//span/preceding::span[text()='Speed']/following::span/text()")
        loader.add_xpath('build_up_play_dribbling', "(.//dl//span/preceding::dd[text()='Dribbling']//span)[1]"
                                                    "/span/text()")
        loader.add_xpath('build_up_play_passing', "(.//dl//span/preceding::span[text()='Passing']/following::span)[1]"
                                                  "/span/text()")
        loader.add_xpath('build_up_play_positioning', "(.//dl//span/preceding::span[text()='Positioning'])[1]"
                                                      "/following::span[1]/text()")
        loader.add_xpath('chance_creation_passing', "(.//dl//span/preceding::span[text()='Shooting']"
                                                    "/following::span)[1]/span/text()")
        loader.add_xpath('chance_creation_crossing', "(.//dl//span/preceding::span[text()='Crossing']"
                                                     "/following::span)[1]/span/text()")
        loader.add_xpath('chance_creation_shooting', "(.//dl//span/preceding::span[text()='Shooting']"
                                                     "/following::span)[1]/span/text()")
        loader.add_xpath('chance_creation_positioning', "(.//dl//span/preceding::span[text()='Positioning'])[2]"
                                                        "/following::span[1]/text()")
        loader.add_xpath('defence_extra_pressure', "(.//dl//span/preceding::span[text()='Pressure']"
                                                   "/following::span)[1]/span/text()")
        loader.add_xpath('defence_extra_aggression', "(.//dl//span/preceding::span[text()='Aggression']"
                                                     "/following::span)[1]/span/text()")
        loader.add_xpath('defence_extra_team_width', "(.//span[text()='Team Width'])[2]/following::span[1]/span/text()")
        loader.add_xpath('defence_extra_defender_line', ".//span[text()='Defender Line']/following::span/text()")

        # PLAYERS

        loader.add_xpath('squad', "(.//table)[1]/tbody/tr//a[contains(@href, '/player/')]/@href")
        loader.add_xpath('on_loan', "(.//table)[2]/tbody/tr//a[contains(@href, '/player/')]/@href")

        # MEDIA

        loader.add_xpath('kits', ".//div[@class='column col-sm-5 text-center']//img/@src")

        # COMMUNITY

        mt_2_loader.add_xpath('likes', "text()[contains(.,'Like')]/following::span[1]/text()")
        mt_2_loader.add_xpath('dislikes', "text()[contains(.,'Dislike')]/following::span[1]/text()")

        print(response.request.headers['User-Agent'])
        self.logger.info(f'Parse function called on {response.url}')

        yield loader.load_item()