def get_single_element_BY(self, target): element = None assert isinstance( target, dict), 'Target must be a dictionary with key:value [type:target]' try: c_target = target.copy() _type, _target = c_target.popitem() if _type is CLASS: element = self.__get_element_by_class(_target) elif _type is ID: element = self.__get_element_by_id(_target) elif _type is XPATH: element = self.__get_element_by_xpath(_target) elif _type is TAG: element = self.__get_element_by_tag(_target) else: print('Error - Type ( {} ).'.format(_type)) except Exception as e: print( 'Exception: {}: {} -> {} - is not present in page: {}'.format( e, c.blue(_type), c.red(_target), c.orange(self.webBrowser.current_url))) finally: if self.log and element is not None: print('{} ---> {}'.format(c.blue(_type), c.green(_target))) return element
def find_elements_BY(self, element, target): nested_element = None assert isinstance( target, dict), 'Target must be a dictionary with key:value [type:target]' try: c_target = target.copy() _type, _target = c_target.popitem() if _type is CLASS: nested_element = self.__find_elements_by_class( element, _target) if _type is ID: nested_element = self.__find_elements_by_id(element, _target) if _type is XPATH: nested_element = self.__find_elements_by_xpath( element, _target) if _type is TAG: nested_element = self.__find_elements_by_tag(element, _target) if _type is LINK: nested_element = self.__find_elements_by_partial_link_text( element, _target) except Exception as e: print( 'Exception: {}: {} -> {} - is not present in page: {}'.format( e, c.blue(_type), c.red(_target), c.orange(self.webBrowser.current_url))) finally: if self.log and nested_element is not None: print('{} ---> {}'.format(c.blue(_type), c.green(_target))) return nested_element
def __init__(self, delay=10, log=False, headless=False, tor=False): # Binary path path = pkg_resources.resource_filename(pkg_name, geckodriver_file) profile = webdriver.FirefoxProfile() if tor: print(c.red('Warning '), c.blue( 'Be sure you have Tor browser opened in backgroud!')) print('Tor Proxy', c.green('Enabled')) profile.set_preference('network.proxy.type', 1) profile.set_preference('network.proxy.socks', '127.0.0.1') profile.set_preference('network.proxy.socks_port', 9150) profile.set_preference("browser.cache.disk.enable", False) profile.set_preference("browser.cache.memory.enable", False) profile.set_preference("browser.cache.offline.enable", False) profile.set_preference("network.http.use-cache", False) options = Options() if headless: options.add_argument('-headless') self.webBrowser = webdriver.Firefox( firefox_profile=profile, executable_path=path, firefox_options=options) self.delay = delay self.log = log self.log_file = 'geckodriver.log' self.wait = WebDriverWait(self.webBrowser, timeout=delay)
def next(self, check=False): D = c.green('@next') next_b = Page.scraper.get_element_BY(Target.ListAd.button_next) link = Page.scraper.find_elements_BY(next_b, Target.ListAd.link) if link: if check: return True Page.scraper.openUrl(link) print(Page.I + self.I + D, c.underline('Next')) return True print(Page.I + self.I + D, c.red('Finish')) return False
def listAdPage(self, n_page): try: self.ad_links = self.listad.pages_links(n_page) except Exception as e: print(c.red(e))
def run(self): try: self.task() except Exception as e: print(c.red(str(e)))