try: try: first_party, rank, url = sites[i] except ValueError: continue cs = CommandSequence.CommandSequence(url, site_rank=rank, first_party=first_party, reset=True) cs.get(sleep=10, timeout=120) manager.execute_command_sequence(cs) with open(os.path.expanduser('~/.openwpm/current_site_index'), 'w') as f: f.write(str(i)) except CommandExecutionError: with open(os.path.expanduser('~/.openwpm/reboot'), 'w') as f: f.write(str(1)) break print "CLOSING TaskManager after batch" manager.close() crawl_utils.clear_tmp_folder() # Remove index file if we are done if current_index >= TOTAL_NUM_SITES: os.remove(os.path.expanduser('~/.openwpm/current_site_index')) with open(os.path.expanduser('~/.openwpm/crawl_done'), 'w') as f: f.write(str(1)) print "Total time: " + str(time.time() - start_time)
manager = TaskManager.TaskManager(manager_params, browser_params) current_index = 0 for i in range(start_index, end_index): current_index = i if current_index >= TOTAL_NUM_SITES: break try: command_sequence = CommandSequence.CommandSequence(sites[i], reset=True) command_sequence.get(sleep=10, timeout=60) command_sequence.get(sleep=10, timeout=60) manager.execute_command_sequence(command_sequence) with open(os.path.expanduser('~/.openwpm/current_site_index'), 'w') as f: f.write(str(i)) except CommandExecutionError: with open(os.path.expanduser('~/.openwpm/stop'), 'w') as f: f.write(str(1)) break # Shut down and clean up after batch manager.close() server.stop() cu.clear_tmp_folder() # Remove index file if we are done if current_index >= TOTAL_NUM_SITES: os.remove(os.path.expanduser('~/.openwpm/current_site_index')) with open(os.path.expanduser('~/.openwpm/crawl_done'), 'w') as f: f.write(str(1))