示例#1
0
    def scrape_email(self):
        # > click on 'Contact info' link on the page
        self.browser.execute_script(
            "(function(){try{for(i in document.getElementsByTagName('a')){let el = document.getElementsByTagName("
            "'a')[i]; if(el.innerHTML.includes('Contact info')){el.click();}}}catch(e){}})()"
        )
        wait_for_loading()

        # > gets email from the 'Contact info' popup
        try:
            email = self.browser.execute_script(
                "return (function(){try{for (i in document.getElementsByClassName('pv-contact-info__contact-type')){ "
                "let el = document.getElementsByClassName('pv-contact-info__contact-type')[i]; if("
                "el.className.includes( 'ci-email')){ return el.children[2].children[0].innerText; } }} catch(e){"
                "return '';}})()")
        except WebDriverException:
            email = ''

        try:
            self.browser.execute_script(
                "document.getElementsByClassName('artdeco-modal__dismiss')[0].click()"
            )
        except WebDriverException:
            pass

        return email
 def scrape_picture(self):
     wait_for_loading()
     try:
         picture = self.browser.execute_script(
             "return document.querySelectorAll('.pv-top-card__photo, .profile-photo-edit__preview')[0].src")
     except WebDriverException:
         picture = ''
     return picture
    def open_contact_info(self):
        if self.contact_info_open:
            return True

        try:
            # > click on 'Contact info' link on the page
            self.browser.execute_script(
                "(function(){try{document.querySelector('[data-control-name=contact_see_more]').click();}catch(e){}})()"
            )
            wait_for_loading()

            self.contact_info_open = True
            return True
        except:
            return False
示例#4
0
    def scrape_about_profile(self):
        try:
            self.browser.execute_script(
                "document.getElementsByClassName('lt-line-clamp__more')[0].click()")
            #show more button clicks 
        except WebDriverException:
            pass

        wait_for_loading()

        try:
            return self.browser.execute_script(
                " return document.getElementsByClassName('pv-about__summary-text')[0].innerText;")
        except WebDriverException:
            return ''
    def scrape_skills(self):
        try:
            self.browser.execute_script(
                "document.getElementsByClassName('pv-skills-section__additional-skills')[0].click()")
        except WebDriverException:
            return []

        wait_for_loading()

        try:
            return self.browser.execute_script(
                "return (function(){els = document.getElementsByClassName('pv-skill-category-entity');results = ["
                "];for (var i=0; i < els.length; i++){results.push(els[i].getElementsByClassName("
                "'pv-skill-category-entity__name-text')[0].innerText);}return results;})()")
        except WebDriverException:
            return []
    def load_full_page(self):
        window_height = self.browser.execute_script("return window.innerHeight")
        scrolls = 1
        while scrolls * window_height < self.browser.execute_script("return document.body.offsetHeight"):
            self.browser.execute_script('window.scrollTo(0, ' + str(window_height * scrolls) + ');')
            wait_for_scrolling()
            scrolls += 1

        for i in range(self.browser.execute_script(
                "return document.getElementsByClassName('pv-profile-section__see-more-inline').length")):
            try:
                self.browser.execute_script(
                    "document.getElementsByClassName('pv-profile-section__see-more-inline')[" + str(
                        i) + "].click()")
            except WebDriverException:
                pass

            wait_for_loading()