def add_canvas_permission(self): """Create a permission db (permissions.sqlite) and add exception for the canvas image extraction. Otherwise screenshots taken by Selenium will be just blank images due to canvas fingerprinting defense in TBB.""" connect_to_db = sqlite3.connect # @UndefinedVariable perm_db = connect_to_db( os.path.join(self.prof_dir_path, "permissions.sqlite")) cursor = perm_db.cursor() # http://mxr.mozilla.org/mozilla-esr31/source/build/automation.py.in cursor.execute("PRAGMA user_version=3") cursor.execute("""CREATE TABLE IF NOT EXISTS moz_hosts ( id INTEGER PRIMARY KEY, host TEXT, type TEXT, permission INTEGER, expireType INTEGER, expireTime INTEGER, appId INTEGER, isInBrowserElement INTEGER)""") domain = get_tld(self.page_url) wl_log.debug("Adding canvas/extractData permission for %s" % domain) qry = """INSERT INTO 'moz_hosts' VALUES(NULL,'%s','canvas/extractData',1,0,0,0,0);""" % domain cursor.execute(qry) perm_db.commit() cursor.close()
def launch_tor_service(self, logfile='/dev/null'): """Launch Tor service and return the process.""" self.log_file = logfile self.tmp_tor_data_dir = ut.clone_dir_with_timestap( cm.get_tor_data_path(self.tbb_version)) self.torrc_dict.update({ 'DataDirectory': self.tmp_tor_data_dir, 'Log': ['INFO file %s' % logfile] }) wl_log.debug("Tor config: %s" % self.torrc_dict) try: self.tor_process = stem.process.launch_tor_with_config( config=self.torrc_dict, init_msg_handler=self.tor_log_handler, tor_cmd=cm.get_tor_bin_path(self.tbb_version), timeout=270) self.controller = Controller.from_port() self.controller.authenticate() return self.tor_process except stem.SocketError as exc: wl_log.critical("Unable to connect to tor on port %s: %s" % (cm.SOCKS_PORT, exc)) sys.exit(1) except: # most of the time this is due to another instance of # tor running on the system wl_log.critical("Error launching Tor", exc_info=True) sys.exit(1) wl_log.info("Tor running at port {0} & controller port {1}.".format( cm.SOCKS_PORT, cm.CONTROLLER_PORT)) return self.tor_process
def start_capture(self, pcap_path=None, pcap_filter=""): """Start capture. Configure sniffer if arguments are given.""" if cm.running_in_CI: wl_log.debug("CI run: will not run dumpcap") return False if pcap_filter: self.set_capture_filter(pcap_filter) if pcap_path: self.set_pcap_path(pcap_path) prefix = "" if cm.running_in_CI: prefix = "sudo " # run as sudo in Travis CI since we cannot setcap command = '{}dumpcap -P -a duration:{} -a filesize:{} -i eth0 -s 0 -f \'{}\' -w {}'\ .format(prefix, cm.SOFT_VISIT_TIMEOUT, cm.MAX_DUMP_SIZE, self.pcap_filter, self.pcap_file) wl_log.info(command) self.p0 = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) timeout = DUMPCAP_START_TIMEOUT # in seconds while timeout > 0 and not self.is_dumpcap_running(): time.sleep(0.1) timeout -= 0.1 if timeout < 0: raise cm.DumpcapTimeoutError() else: wl_log.debug("dumpcap started in %s seconds" % (DUMPCAP_START_TIMEOUT - timeout)) self.is_recording = True
def start_capture(self, pcap_path=None, pcap_filter=""): """Start capture. Configure sniffer if arguments are given.""" if pcap_filter: self.set_capture_filter(pcap_filter) if pcap_path: self.set_pcap_path(pcap_path) prefix = "" # 修改eth0为本地测试接口WLAN 写成配置文件 command = '{}dumpcap -P -a duration:{} -a filesize:{} -i {} -s 0 -f \"{}\" -w {}'\ .format(prefix, cm.SOFT_VISIT_TIMEOUT, cm.MAX_DUMP_SIZE, self.netif, self.pcap_filter, self.pcap_file) wl_log.info(command) self.p0 = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) timeout = DUMPCAP_START_TIMEOUT # in seconds while timeout > 0 and not self.is_dumpcap_running(): time.sleep(0.1) timeout -= 0.1 if timeout < 0: raise DumpcapTimeoutError() else: wl_log.debug("dumpcap started in %s seconds" % (DUMPCAP_START_TIMEOUT - timeout)) self.is_recording = True
def start_capture(self, pcap_path=None, pcap_filter=""): """Start capture. Configure sniffer if arguments are given.""" if cm.running_in_CI: wl_log.debug("CI run: will not run dumpcap") return False if pcap_filter: self.set_capture_filter(pcap_filter) if pcap_path: self.set_pcap_path(pcap_path) prefix = "" if cm.running_in_CI: prefix = "sudo " # run as sudo in Travis CI since we cannot setcap command = '{}dumpcap -a duration:{} -a filesize:{} -i any -s 0 -f \'{}\' -w {}'\ .format(prefix, cm.SOFT_VISIT_TIMEOUT, cm.MAX_DUMP_SIZE, self.pcap_filter, self.pcap_file) wl_log.info(command) self.p0 = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) timeout = DUMPCAP_START_TIMEOUT # in seconds while timeout > 0 and not self.is_dumpcap_running(): time.sleep(0.1) timeout -= 0.1 if timeout < 0: raise cm.DumpcapTimeoutError() else: wl_log.debug("dumpcap started in %s seconds" % (DUMPCAP_START_TIMEOUT - timeout)) self.is_recording = True
def parse_arguments(): # Read configuration file config = ConfigParser.RawConfigParser() config.read(cm.CONFIG_FILE) # Parse arguments parser = argparse.ArgumentParser(description='Crawl a list of URLs in multiple batches.') # List of urls to be crawled parser.add_argument('-u', '--url-file', required=True, help='Path to the file that contains the list of URLs to crawl.', default=cm.LOCALIZED_DATASET) parser.add_argument('-t', '--type', choices=cm.CRAWLER_TYPES, help="Crawler type to use for this crawl.", default='Base') parser.add_argument('-o', '--output', help='Directory to dump the results (default=./results).', default=cm.CRAWL_DIR) parser.add_argument('-c', '--config', help="Crawler tor driver and controller configurations.", choices=config.sections(), default="default") parser.add_argument('-b', '--tbb-path', help="Path to the Tor Browser Bundle directory.", default=cm.TBB_DIR) parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity', default=False) # Crawler features parser.add_argument('-x', '--virtual-display', help='Dimensions of the virtual display, eg 1200x800', default='') parser.add_argument('-s', '--screenshots', action='store_true', help='Capture page screenshots', default=False) # Limit crawl parser.add_argument('--start', type=int, help='Select URLs from this line number: (default: 1).', default=1) parser.add_argument('--stop', type=int, help='Select URLs after this line number: (default: EOF).', default=maxsize) # Parse arguments args = parser.parse_args() # Set verbose level wl_log.setLevel(DEBUG if args.verbose else INFO) del args.verbose # Change results dir if output cm.CRAWL_DIR = args.output del args.output wl_log.debug("Command line parameters: %s" % argv) return args, config
def post_visit(self): guard_ips = set([ip for ip in self.controller.get_all_guard_ips()]) wl_log.debug("Found %s guards in the consensus.", len(guard_ips)) wl_log.info("Filtering packets without a guard IP.") try: ut.filter_pcap(self.job.pcap_file, guard_ips) except Exception as e: wl_log.error("ERROR: filtering pcap file: %s.", e) wl_log.error("Check pcap: %s", self.job.pcap_file)
def post_visit(self): guard_ips = set([ip for ip in self.controller.get_all_guard_ips()]) wl_log.debug("Found %s guards in the consensus.", len(guard_ips)) wl_log.info("Filtering packets without a guard IP.") try: ut.filter_pcap(self.job.pcap_file, guard_ips) except Exception as e: wl_log.error("ERROR: filtering pcap file: %s.", e) wl_log.error("Check pcap: %s", self.job.pcap_file)
def parse_arguments(): # Parse arguments parser = argparse.ArgumentParser(description='Crawl a list of URLs in multiple batches.') # List of urls to be crawled parser.add_argument('-u', '--url-list', required=True, help='Path to the fail that contains the list of URLs to crawl.') parser.add_argument('-o', '--output', help='Directory to dump the results (default=./results).', default=cm.RESULTS_DIR) parser.add_argument('-b', '--tbb-path', help="Path to the Tor Browser Bundle directory.", default=cm.TBB_PATH) parser.add_argument("-e", "--experiment", choices=cm.EXP_TYPES, help="Specifies the crawling methodology.", default=cm.EXP_TYPE_WANG_AND_GOLDBERG) parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity', default=False) # For understanding batch and instance parameters please refer # to Wang and Goldberg's WPES'13 paper, Section 4.1.4 parser.add_argument('--batches', type=int, help='Number of batches in the crawl (default: %s)' % cm.NUM_BATCHES, default=cm.NUM_BATCHES) parser.add_argument('--instances', type=int, help='Number of instances to crawl for each web page (default: %s)' % cm.NUM_INSTANCES, default=cm.NUM_INSTANCES) # Crawler features parser.add_argument('-x', '--xvfb', action='store_true', help='Use XVFB (for headless testing)', default=False) parser.add_argument('-c', '--capture-screen', action='store_true', help='Capture page screenshots', default=False) # Limit crawl parser.add_argument('--start', type=int, help='Start crawling URLs from this line number: (default: 1).', default=1) parser.add_argument('--stop', type=int, help='Stop crawling URLs after this line number: (default: EOF).', default=maxsize) # Parse arguments args = parser.parse_args() # Set verbose level wl_log.setLevel(DEBUG if args.verbose else INFO) del args.verbose wl_log.debug("Command line parameters: %s" % argv) return args
def take_screenshot(self): try: out_png = os.path.join(self.visit_dir, 'screenshot.png') wl_log.info("Taking screenshot of %s to %s" % (self.page_url, out_png)) self.tb_driver.get_screenshot_as_file(out_png) if cm.running_in_CI: wl_log.debug("Screenshot data:image/png;base64,%s" % self.tb_driver.get_screenshot_as_base64()) except: wl_log.info("Exception while taking screenshot of: %s" % self.page_url)
def take_screenshot(self): try: out_png = os.path.join(self.visit_dir, 'screenshot.png') wl_log.info("Taking screenshot of %s to %s" % (self.page_url, out_png)) self.tb_driver.get_screenshot_as_file(out_png) if cm.running_in_CI: wl_log.debug("Screenshot data:image/png;base64,%s" % self.tb_driver.get_screenshot_as_base64()) except: wl_log.info("Exception while taking screenshot of: %s" % self.page_url)
def pack_crawl_data(crawl_dir): """Compress the crawl dir into a tar archive.""" if not os.path.isdir(crawl_dir): wl_log.critical("Cannot find the crawl dir: %s" % crawl_dir) return False if crawl_dir.endswith(os.path.sep): crawl_dir = crawl_dir[:-1] crawl_name = os.path.basename(crawl_dir) containing_dir = os.path.dirname(crawl_dir) os.chdir(containing_dir) arc_path = "%s.tar.gz" % crawl_name tar_cmd = "tar czvf %s %s" % (arc_path, crawl_name) wl_log.debug("Packing the crawl dir with cmd: %s" % tar_cmd) status, txt = commands.getstatusoutput(tar_cmd) if status or is_targz_archive_corrupt(arc_path): wl_log.critical("Tar command failed or archive is corrupt:\ %s \nSt: %s txt: %s" % (tar_cmd, status, txt)) return False else: return True
def pack_crawl_data(crawl_dir): """Compress the crawl dir into a tar archive.""" if not os.path.isdir(crawl_dir): wl_log.critical("Cannot find the crawl dir: %s" % crawl_dir) return False if crawl_dir.endswith(os.path.sep): crawl_dir = crawl_dir[:-1] crawl_name = os.path.basename(crawl_dir) containing_dir = os.path.dirname(crawl_dir) os.chdir(containing_dir) arc_path = "%s.tar.gz" % crawl_name tar_cmd = "tar czvf %s %s" % (arc_path, crawl_name) wl_log.debug("Packing the crawl dir with cmd: %s" % tar_cmd) status, txt = commands.getstatusoutput(tar_cmd) if status or is_targz_archive_corrupt(arc_path): wl_log.critical("Tar command failed or archive is corrupt:\ %s \nSt: %s txt: %s" % (tar_cmd, status, txt)) return False else: return True
def filter_guards_from_pcap(self): guard_ips = set([ip for ip in self.tor_controller.get_all_guard_ips()]) wl_log.debug("Found %s guards in the concensus.", len(guard_ips)) orig_pcap = self.pcap_path + ".original" copyfile(self.pcap_path, orig_pcap) try: preader = PcapReader(orig_pcap) pcap_filtered = [] for p in preader: if IP not in p: pcap_filtered.append(p) continue ip = p.payload if ip.dst in guard_ips or ip.src in guard_ips: pcap_filtered.append(p) wrpcap(self.pcap_path, pcap_filtered) except Exception as e: wl_log.error("ERROR: filtering pcap file: %s. Check old pcap: %s", e, orig_pcap) else: os.remove(orig_pcap)
def start_capture(self, device='', pcap_path=None, pcap_filter=""): """Start capture. Configure sniffer if arguments are given.""" if pcap_filter: self.set_capture_filter(pcap_filter) if pcap_path: self.set_pcap_path(pcap_path) if device: self.device = device prefix = "" command = ( "{}tshark -nn -T fields -E separator=, -e frame.time_epoch" " -e ip.src -e ip.dst -e tcp.srcport -e tcp.dstport" " -e ip.proto -e ip.len -e ip.hdr_len -e tcp.hdr_len -e data.len" " -e tcp.flags -e tcp.seq -e tcp.ack" " -e tcp.window_size_value -e _ws.expert.message " " -a duration:{} -a filesize:{} -s 0 -i {} -f \'{}\'" " -w {} > {}".format(prefix, cm.SOFT_VISIT_TIMEOUT, cm.MAX_DUMP_SIZE, self.device, self.pcap_filter, self.pcap_file, '%s.tshark' % self.pcap_file[:-5])) wl_log.info(command) self.p0 = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) timeout = SNIFFER_START_TIMEOUT # in seconds while timeout > 0 and not self.is_running(): time.sleep(0.1) timeout -= 0.1 if timeout < 0: raise SnifferTimeoutError() else: wl_log.debug("tshark started in %s seconds" % (SNIFFER_START_TIMEOUT - timeout)) self.is_recording = True
def close_all_streams(self): """Close all streams of a controller.""" wl_log.debug("Closing all streams") try: ut.timeout(cm.STREAM_CLOSE_TIMEOUT) for stream in self.controller.get_streams(): wl_log.debug( "Closing stream %s %s %s " % (stream.id, stream.purpose, stream.target_address)) self.controller.close_stream(stream.id) # MISC reason except ut.TimeExceededError: wl_log.critical("Closing streams timed out!") except: wl_log.debug("Exception closing stream") finally: ut.cancel_timeout()
def parse_arguments(): # Read configuration file config = ConfigParser.RawConfigParser() config.read(cm.CONFIG_FILE) # Parse arguments parser = argparse.ArgumentParser( description='Crawl a list of URLs in multiple batches.') # List of urls to be crawled parser.add_argument( '-u', '--url-file', required=True, help='Path to the file that contains the list of URLs to crawl.', default=cm.LOCALIZED_DATASET) parser.add_argument('-t', '--type', choices=cm.CRAWLER_TYPES, help="Crawler type to use for this crawl.", default='Base') parser.add_argument( '-o', '--output', help='Directory to dump the results (default=./results).', default=cm.CRAWL_DIR) parser.add_argument( '-c', '--config', help="Crawler tor driver and controller configurations.", choices=config.sections(), default="default") parser.add_argument('-b', '--tbb-path', help="Path to the Tor Browser Bundle directory.", default=cm.TBB_DIR) parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity', default=False) # Crawler features parser.add_argument('-x', '--virtual-display', help='Dimensions of the virtual display, eg 1200x800', default='') parser.add_argument('-s', '--screenshots', action='store_true', help='Capture page screenshots', default=False) # Limit crawl parser.add_argument( '--start', type=int, help='Select URLs from this line number: (default: 1).', default=1) parser.add_argument( '--stop', type=int, help='Select URLs after this line number: (default: EOF).', default=maxsize) # Parse arguments args = parser.parse_args() # Set verbose level wl_log.setLevel(DEBUG if args.verbose else INFO) del args.verbose # Change results dir if output cm.CRAWL_DIR = args.output del args.output wl_log.debug("Command line parameters: %s" % argv) return args, config
def parse_arguments(): # Read configuration file config = ConfigParser.RawConfigParser() config.read(cm.CONFIG_FILE) # Parse arguments parser = argparse.ArgumentParser(description='Crawl a list of URLs in multiple batches.') # List of urls to be crawled parser.add_argument('-u', '--urls', required=True, help='Path to the file that contains the list of URLs to crawl,' ' or a comma-separated list of URLs.', default=cm.LOCALIZED_DATASET) parser.add_argument('-t', '--type', choices=cm.CRAWLER_TYPES, help="Crawler type to use for this crawl.", default='Base') parser.add_argument('-o', '--output', help='Directory to dump the results (default=./results).', default=cm.CRAWL_DIR) parser.add_argument('-i', '--crawl-id', help='String used as crawl ID (default=DATE).', default=None) parser.add_argument('-e', '--addons_dir', help='Directory with the add-ons to be installed (default=None).', default=None) parser.add_argument('-c', '--config', help="Crawler tor driver and controller configurations.", choices=config.sections(), default="default") parser.add_argument('-b', '--tbb-path', help="Path to the Tor Browser Bundle directory.", default=cm.TBB_DIR) parser.add_argument('-f', '--tor-binary-path', help="Path to the Tor binary.") parser.add_argument('-g', '--tor-data-path', help="Path to the Tor data directory.") parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity', default=False) parser.add_argument('-r', '--recover-file', help="File with checkpoint to recover from.", default=None) # Crawler features parser.add_argument('-x', '--virtual-display', help='Dimensions of the virtual display, eg 1200x800', default='') parser.add_argument('-s', '--screenshots', action='store_true', help='Capture page screenshots', default=False) parser.add_argument('-d', '--device', help='Interface to sniff the network traffic', choices=cm.IFACES, default='eth0') # Limit crawl parser.add_argument('--start', type=int, help='Select URLs from this line number: (default: 1).', default=1) parser.add_argument('--stop', type=int, help='Select URLs after this line number: (default: EOF).', default=maxsize) # Parse arguments args = parser.parse_args() # Set verbose level wl_log.setLevel(DEBUG if args.verbose else INFO) del args.verbose # Set crawl ID if args.crawl_id: cm.set_crawl_id(args.crawl_id) del args.crawl_id # Change results dir if output cm.CRAWL_DIR = abspath(args.output) cm.LOGS_DIR = join(cm.CRAWL_DIR, 'logs') cm.CRAWL_LOG_FILENAME = join(cm.LOGS_DIR, 'crawl.log') cm.TOR_LOG_FILENAME = join(cm.LOGS_DIR, 'tor.log') if args.recover_file is not None: if isfile(cm.CRAWL_LOG_FILENAME): move(cm.CRAWL_LOG_FILENAME, cm.CRAWL_LOG_FILENAME + '.' + cm.CRAWL_ID) if isfile(cm.TOR_LOG_FILENAME): move(cm.TOR_LOG_FILENAME, cm.TOR_LOG_FILENAME + '.' + cm.CRAWL_ID) del args.output # Set local IP addresses = ifaddresses(args.device) ips = addresses.setdefault(AF_INET, [{'addr': 'No IP'}]) cm.LOCAL_IP = ips[0]['addr'] wl_log.debug("Command line parameters: %s" % argv) return args, config