import proxy import realtime import worker parser = OptionParser() parser.add_option("--config", dest="config", help="Name of Configuration File", default=None) parser.add_option("--workers", type="int", dest="workers", help="number of workers", default=50) (options, args) = parser.parse_args() configParser = configparser.ConfigParser() configParser.read(options.config) # Update proxies proxy_sites = configParser.get("Data Feed", "proxy_sites").split(",") proxy_links = int(configParser.get("Data Feed", "proxy_links")) proxy_file = configParser.get("Data Feed", "proxy_file") proxies = proxy.check_proxies(proxy.crawl(proxy_sites, proxy_links), workers=options.workers) with open(proxy_file, 'r') as f: original_proxies = f.read().split("\n") original_proxies = [ p for p in original_proxies if p != "" ] proxies = list(set(proxies + original_proxies)) with open(proxy_file, 'w') as f: for p in proxies: print(p, file=f)
pause_wait = float(configParser.get("Data Feed", "pause_wait")) resume_wait = float(configParser.get("Data Feed", "resume_wait")) # Set up proxies try: with open(configParser.get("Data Feed", "proxy_file"), 'r') as f: proxies = f.read().split("\n") proxies = [ p for p in proxies if p != "" ] except: proxy_sites = configParser.get("Data Feed", "proxy_sites").split(",") proxy_links = int(configParser.get("Data Feed", "proxy_links")) proxies = proxy.crawl(proxy_sites, proxy_links) proxies = proxy.check_proxies(proxies) try: if len(proxies) < 1: proxies = [None] except NameError: proxies = [None] # Set up publisher data_feed_port = configParser.get("Data Feed", "data_feed_port") context = zmq.Context() socket = context.socket(zmq.PUB) socket.bind("tcp://*:%s" % data_feed_port)