def setTorProxy(): # TOR Socks proxy isTorEnabled = CFG_CRAWLER.get("clientConfig").get("tor_enabled", False) if isTorEnabled: torProxyAdress = CFG_CRAWLER.get("clientConfig").get("tor_proxyadress", "localhost") torProxyPort = CFG_CRAWLER.get("clientConfig").get("tor_proxyport", 9050) # Route an HTTP request through the SOCKS proxy socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, torProxyAdress, torProxyPort) socket.socket = socks.socksocket
def send_supergros(self, xml_file): if config.use_proxy: logger.info('connecting to proxy') socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5,"127.0.0.1", 9999) socket.socket = socks.socksocket logger.info('ftp-ing ro supergros') ftp = ftplib.FTP(config.supergros_server) ftp.login(config.supergros_user, config.supergros_password) ftp.cwd('PROD/EDISGR/D0901-FROM-EDISGR') self.do_ftp(ftp, xml_file)
def setTorProxy(): # TOR Socks proxy isTorEnabled = CFG_CRAWLER.get("clientConfig").get("tor_enabled", False) if isTorEnabled: torProxyAdress = CFG_CRAWLER.get("clientConfig").get( "tor_proxyadress", "localhost") torProxyPort = CFG_CRAWLER.get("clientConfig").get( "tor_proxyport", 9050) # Route an HTTP request through the SOCKS proxy socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, torProxyAdress, torProxyPort) socket.socket = socks.socksocket
def runCrawler(): mapURL = {} cfgCrawler = Config(os.path.join(RAGPICKER_ROOT, 'config', 'crawler.conf')) #TOR Socks proxy isTorEnabled = cfgCrawler.get("clientConfig").get("tor_enabled", False) if isTorEnabled: torProxyAdress = cfgCrawler.get("clientConfig").get("tor_proxyadress", "localhost") torProxyPort = cfgCrawler.get("clientConfig").get("tor_proxyport", 9050) # Route an HTTP request through the SOCKS proxy socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, torProxyAdress, torProxyPort) socket.socket = socks.socksocket # Build the PluginManager crawlerPluginManager = PluginManager() crawlerPluginManager.setPluginPlaces(["crawler"]) crawlerPluginManager.collectPlugins() # Trigger run from the "Crawler" plugins for pluginInfo in crawlerPluginManager.getAllPlugins(): crawlerModul = pluginInfo.plugin_object #Config for crawler module try: options = cfgCrawler.get(pluginInfo.name) crawlerModul.set_options(options) except Exception: log.error("Crawler module %s not found in configuration file", pluginInfo.name) # If the crawler module is disabled in the config, skip it. if not options.enabled: continue log.info("Run Crawler: " + pluginInfo.name) try: returnMap = crawlerModul.run() mapURL.update(returnMap) except Exception as e: log.error('Error (%s) in %s', e, pluginInfo.name) return mapURL