def filter_pcap(pcap_path, iplist): """ Filter capture by TCP packets addressed to any address in ``iplist`` """ ack_num = 0 pkt_num = 0 pcap_filtered = [] orig_pcap = pcap_path + ".original" copyfile(pcap_path, orig_pcap) with PcapReader(orig_pcap) as preader: for p in preader: pkt_num += 1 if 'TCP' in p: ip = p.payload if len(ip.payload.payload) == 0: #ACK ack_num += 1 continue if ip.dst in iplist or ip.src in iplist: pcap_filtered.append(p) wrpcap(pcap_path, pcap_filtered) wl_log.debug("Filter out %d/%d ACK packets." % (ack_num, pkt_num)) subprocess.call("rm " + orig_pcap, shell=True) subprocess.call("chmod 777 " + pcap_path, shell=True) wl_log.debug("Delete raw pcap and change priviledge of pcap file.")
def start_capture(self, pcap_path=None, pcap_filter="", dumpcap_log=None): """Start capture. Configure sniffer if arguments are given.""" if pcap_filter: self.set_capture_filter(pcap_filter) if pcap_path: self.set_pcap_path(pcap_path) prefix = "" command = '{}dumpcap -P -a duration:{} -a filesize:{} -i {} -s 0 -f \'{}\' -w {}'\ .format(prefix, cm.HARD_VISIT_TIMEOUT, cm.MAX_DUMP_SIZE, self.device, self.pcap_filter, self.pcap_file) wl_log.info(command) if dumpcap_log: log_fi = open(dumpcap_log, "w+") self.p0 = subprocess.Popen(command, stdout=log_fi, stderr=log_fi, shell=True) else: self.p0 = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) timeout = DUMPCAP_START_TIMEOUT # in seconds while timeout > 0 and not self.is_dumpcap_running(): time.sleep(0.1) timeout -= 0.1 if timeout < 0: raise DumpcapTimeoutError() else: wl_log.debug("dumpcap started in %s seconds" % (DUMPCAP_START_TIMEOUT - timeout)) self.is_recording = True
def post_visit(self): guard_ips = set([ip for ip in self.controller.get_all_guard_ips()]) wl_log.debug("Found %s guards in the consensus.", len(guard_ips)) wl_log.info("Filtering packets without a guard IP.") try: ut.filter_pcap(self.job.pcap_file, guard_ips) except Exception as e: wl_log.error("ERROR: filtering pcap file: %s.", e) wl_log.error("Check pcap: %s", self.job.pcap_file)
def post_visit(self): guard_ips = set([ip for ip in self.controller.get_all_guard_ips()]) #hard-coded bridge ips, used when we set up our own bridges guard_ips.update(cm.My_Bridge_Ips) wl_log.debug("Found %s guards in the consensus.", len(guard_ips)) wl_log.info("Filtering packets without a guard IP.") try: ut.filter_pcap(self.job.pcap_file, guard_ips) except Exception as e: wl_log.error("ERROR: filtering pcap file: %s.", e) wl_log.error("Check pcap: %s", self.job.pcap_file)
def _do_visit(self): with Sniffer(path=self.job.pcap_file, filter=cm.DEFAULT_FILTER, device=self.device, dumpcap_log=self.job.pcap_log): sleep(1) # make sure dumpcap is running try: screenshot_count = 0 with ut.timeout(cm.HARD_VISIT_TIMEOUT): # begin loading page self.driver.get(self.job.url) # take first screenshot if self.screenshots: try: self.driver.get_screenshot_as_file( self.job.png_file(screenshot_count)) screenshot_count += 1 except WebDriverException: wl_log.error("Cannot get screenshot.") # check video player status status_to_string = [ 'ended', 'played', 'paused', 'buffered', 'queued', 'unstarted' ] js = "return document.getElementById('movie_player').getPlayerState()" player_status = self.driver.execute_script(js) # continue visit capture until video is has fully played ts = time() while player_status != 0: # attempt to simulate user skipping add if player_status == -1: try: skipAds = self.driver.find_elements( By.XPATH, "//button[@class=\"ytp-ad-skip-button ytp-button\"]" ) wl_log.info(len(skipAds)) for skipAd in skipAds: skipAd.click() except WebDriverException as e: pass # unpause video if state is unstarted or is for some reason paused if player_status == -1 or player_status == 2: self.driver.execute_script( "return document.getElementById('movie_player').playVideo()" ) # busy loop delay sleep(1) # check video state again new_ps = self.driver.execute_script(js) # print progress updates every time the video state changes # or on the screenshot interval ts_new = time() if player_status != new_ps or ts_new - ts > cm.SCREENSHOT_INTERVAL: wl_log.debug( 'youtube status: {} for {:.2f} seconds'.format( status_to_string[player_status], ts_new - ts)) ts = ts_new # take periodic screenshots if self.screenshots: try: self.driver.get_screenshot_as_file( self.job.png_file(screenshot_count)) screenshot_count += 1 except WebDriverException: wl_log.error("Cannot get screenshot.") player_status = new_ps except (cm.HardTimeoutException, TimeoutException): wl_log.error("Visit to %s reached hard timeout!", self.job.url) except Exception as exc: wl_log.error("Unknown exception: %s", exc)
def parse_arguments(): # Read configuration file config = ConfigParser.RawConfigParser() config.read(cm.CONFIG_FILE) # Parse arguments parser = argparse.ArgumentParser(description='Crawl a list of youtube URLs in multiple batches.') # List of urls to be crawled parser.add_argument('-u', '--url-file', required=True, help='Path to the file that contains the list of video URLs to crawl.', default=cm.VIDEO_LIST) parser.add_argument('-o', '--output', help='Directory to dump the results (default=./results).', default=cm.CRAWL_DIR) parser.add_argument('-c', '--config', help="Crawler tor driver and controller configurations.", choices=config.sections(), default="default") parser.add_argument('-b', '--tbb-path', help="Path to the Tor Browser Bundle directory.", default=cm.TBB_DIR) parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity', default=False) parser.add_argument('-d', '--device', type=str, default='eth0', help='Device interface on which to capture traffic.') parser.add_argument('--timeout', type=int, default=10, help='Hard timeout (minutes) before video capture is interrupted.') # Crawler features parser.add_argument('-x', '--virtual-display', help='Dimensions of the virtual display, eg 1200x800', default=None) parser.add_argument('-s', '--screenshots', action='store_true', help='Capture page screenshots', default=False) # Limit crawl parser.add_argument('--start', type=int, help='Select URLs starting with this line number: (default: 1).', default=1) parser.add_argument('--stop', type=int, help='Select URLs after this line number: (default: EOF).', default=maxsize) # Parse arguments args = parser.parse_args() # Set verbose level wl_log.setLevel(DEBUG if args.verbose else INFO) del args.verbose # Change results dir if output cm.CRAWL_DIR = args.output del args.output # Change video load timeout cm.HARD_VISIT_TIMEOUT = args.timeout*60 del args.timeout wl_log.debug("Command line parameters: %s" % argv) return args, config