class Warrior(object): '''The warrior god object.''' def __init__(self, projects_dir, data_dir, warrior_hq_url, real_shutdown=False, keep_data=False): if not os.access(projects_dir, os.W_OK): raise Exception( "Couldn't write to projects directory: %s" % projects_dir) if not os.access(data_dir, os.W_OK): raise Exception("Couldn't write to data directory: %s" % data_dir) self.projects_dir = projects_dir self.data_dir = data_dir self.warrior_hq_url = warrior_hq_url self.real_shutdown = real_shutdown self.keep_data = keep_data # disable the password prompts self.gitenv = dict( list(os.environ.items()) + list({ 'GIT_ASKPASS': '******', 'SSH_ASKPASS': '******' }.items()) ) self.warrior_id = StringConfigValue( name="warrior_id", title="Warrior ID", description="The unique number of your warrior instance.", editable=False ) self.selected_project_config_value = StringConfigValue( name="selected_project", title="Selected project", description="The project (to be continued when the warrior " "restarts).", default="none", editable=False ) self.downloader = StringConfigValue( name="downloader", title="Your nickname", description="We use your nickname to show your results on our " "tracker. Letters and numbers only.", regex="^[-_a-zA-Z0-9]{3,30}$", advanced=False ) self.concurrent_items = NumberConfigValue( name="concurrent_items", title="Concurrent items", description="How many items should the warrior download at a " "time? (Max: 6)", min=1, max=6, default=2 ) self.http_username = StringConfigValue( name="http_username", title="HTTP username", description="Enter a username to protect the web interface, " "or leave empty.", default="" ) self.http_password = StringConfigValue( name="http_password", title="HTTP password", description="Enter a password to protect the web interface, " "or leave empty.", default="" ) self.config_manager = ConfigManager(os.path.join(projects_dir, "config.json")) self.config_manager.add(self.warrior_id) self.config_manager.add(self.selected_project_config_value) self.config_manager.add(self.downloader) self.config_manager.add(self.concurrent_items) self.config_manager.add(self.http_username) self.config_manager.add(self.http_password) self.bandwidth_monitor = BandwidthMonitor("eth0") self.bandwidth_monitor.update() self.runner = Runner(concurrent_items=self.concurrent_items, keep_data=self.keep_data) self.runner.on_finish += self.handle_runner_finish self.current_project_name = None self.current_project = None self.selected_project = None self.projects = {} self.installed_projects = set() self.failed_projects = set() self.on_projects_loaded = Event() self.on_project_installing = Event() self.on_project_installed = Event() self.on_project_installation_failed = Event() self.on_project_refresh = Event() self.on_project_selected = Event() self.on_status = Event() self.on_broadcast_message_received = Event() self.http_client = AsyncHTTPClient() self.installing = False self.shut_down_flag = False self.reboot_flag = False io_loop = ioloop.IOLoop.instance() def update_warror_callback(): io_loop.add_future( self.update_warrior_hq(), lambda fut: fut.result() ) def update_project_callback(): io_loop.add_future(self.update_project(), lambda fut: fut.result()) self.hq_updater = ioloop.PeriodicCallback(update_warror_callback, 10 * 60 * 1000) self.project_updater = ioloop.PeriodicCallback(update_project_callback, 30 * 60 * 1000) self.forced_reboot_timeout = None self.lat_lng = None self.find_lat_lng() self.install_output = None self.broadcast_message = None self.contacting_hq_failed = False def find_lat_lng(self): # response = self.http_client.fetch("http://www.maxmind.com/app/mylocation", self.handle_lat_lng, user_agent="") pass def handle_lat_lng(self, response): m = re.search(r"geoip-demo-results-tbodyLatitude/Longitude</td>" r"\s*<td[^>]*>\s*([-/.0-9]+)\s*</td>", response.body) if m: self.lat_lng = m.group(1) def bandwidth_stats(self): self.bandwidth_monitor.update() return self.bandwidth_monitor.current_stats() @gen.coroutine def update_warrior_hq(self): logger.debug('Update warrior hq.') if realize(self.warrior_id) is None: headers = {"Content-Type": "application/json"} user_agent = "ArchiveTeam Warrior/%s" % seesaw.__version__ body = json.dumps( {"warrior": {"version": seesaw.__version__}} ) response = yield self.http_client.fetch( os.path.join(self.warrior_hq_url, "api/register.json"), method="POST", headers=headers, user_agent=user_agent, body=body ) if response.code == 200: data = json.loads(response.body.decode('utf-8')) logger.info("Received Warrior ID '%s'." % data["warrior_id"]) self.config_manager.set_value("warrior_id", data["warrior_id"]) self.fire_status() else: logger.error("HTTP error %s" % (response.code)) self.fire_status() return else: logger.debug("Warrior ID '%s'." % realize(self.warrior_id)) headers = {"Content-Type": "application/json"} user_agent = "ArchiveTeam Warrior/%s %s" % (seesaw.__version__, seesaw.runner_type) body = json.dumps({ "warrior": { "warrior_id": realize(self.warrior_id), "lat_lng": self.lat_lng, "downloader": realize(self.downloader), "selected_project": realize(self.selected_project_config_value) }}) response = yield self.http_client.fetch( os.path.join(self.warrior_hq_url, "api/update.json"), method="POST", headers=headers, user_agent=user_agent, body=body ) if response.code == 200: data = json.loads(response.body.decode('utf-8')) if StrictVersion(seesaw.__version__) < \ StrictVersion(data["warrior"]["seesaw_version"]): # time for an update logger.info("Reboot for Seesaw update.") self.reboot_gracefully() # schedule a forced reboot after two days self.schedule_forced_reboot() return projects_list = data["projects"] self.projects = OrderedDict( [(project["name"], project) for project in projects_list]) for project_data in self.projects.values(): if "deadline" in project_data: project_data["deadline_int"] = time.mktime( time.strptime(project_data["deadline"], "%Y-%m-%dT%H:%M:%SZ")) previous_project_choice = realize( self.selected_project_config_value) if self.selected_project and \ self.selected_project not in self.projects: yield self.select_project(None) elif previous_project_choice in self.projects: # select previous project yield self.select_project(previous_project_choice) elif previous_project_choice == "auto": # ArchiveTeam's choice if "auto_project" in data: yield self.select_project(data["auto_project"]) else: yield self.select_project(None) self.contacting_hq_failed = False self.on_projects_loaded(self, self.projects) self.broadcast_message = data.get('broadcast_message') self.on_broadcast_message_received( self, data.get('broadcast_message')) else: logger.error("HTTP error %s" % (response.code)) self.contacting_hq_failed = True # We don't set projects to {} because it causes the # "Stop Current" project button to disappear for name in tuple(self.projects): if name != self.selected_project: del self.projects[name] self.on_projects_loaded(self, self.projects) @gen.coroutine def install_project(self, project_name): logger.debug('Install project %s', project_name) self.installed_projects.discard(project_name) if project_name in self.projects and not self.installing: self.installing = project_name self.install_output = [] project = self.projects[project_name] project_path = os.path.join(self.projects_dir, project_name) self.on_project_installing(self, project) if project_name in self.failed_projects: if os.path.exists(project_path): shutil.rmtree(project_path) self.failed_projects.discard(project_name) if os.path.exists(project_path): subprocess.Popen( args=["git", "config", "remote.origin.url", project["repository"]], cwd=project_path ).communicate() logger.debug('git pull from %s', project["repository"]) p = AsyncPopen2( args=["git", "pull"], cwd=project_path, env=self.gitenv ) else: logger.debug('git clone') p = AsyncPopen2( args=["git", "clone", project["repository"], project_path], env=self.gitenv ) p.on_output += self.collect_install_output p.on_end += yield gen.Callback("gitend") p.run() result = yield gen.Wait("gitend") if result != 0: self.install_output.append("\ngit returned %d\n" % result) logger.error( "Project failed to install: %s", "".join(self.install_output) ) self.on_project_installation_failed( self, project, "".join(self.install_output)) self.installing = None self.failed_projects.add(project_name) raise gen.Return(False) else: logger.debug( "git operation: %s", "".join(self.install_output) ) project_install_file = os.path.join(project_path, "warrior-install.sh") if os.path.exists(project_install_file): p = AsyncPopen2( args=[project_install_file], cwd=project_path ) p.on_output += self.collect_install_output p.on_end += yield gen.Callback("installend") p.run() result = yield gen.Wait("installend") if result != 0: self.install_output.append( "\nCustom installer returned %d\n" % result) logger.error( "Custom installer failed to install: %s", "".join(self.install_output) ) self.on_project_installation_failed( self, project, "".join(self.install_output)) self.installing = None self.failed_projects.add(project_name) raise gen.Return(False) data_dir = os.path.join(self.data_dir, "data") if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) project_data_dir = os.path.join(project_path, "data") if os.path.islink(project_data_dir): os.remove(project_data_dir) elif os.path.isdir(project_data_dir): shutil.rmtree(project_data_dir) os.symlink(data_dir, project_data_dir) self.installed_projects.add(project_name) logger.debug('Install complete %s', "".join(self.install_output)) self.on_project_installed(self, project, "".join(self.install_output)) self.installing = None raise gen.Return(True) @gen.coroutine def update_project(self): logger.debug('Update project.') if self.selected_project and \ (yield self.check_project_has_update(self.selected_project)): # restart project yield self.start_selected_project(reinstall=True) @gen.coroutine def check_project_has_update(self, project_name): logger.debug('Check project has update %s', project_name) if project_name in self.projects: project = self.projects[project_name] project_path = os.path.join(self.projects_dir, project_name) self.install_output = [] if not os.path.exists(project_path): logger.debug("Project doesn't exist.") raise gen.Return(True) subprocess.Popen( args=["git", "config", "remote.origin.url", project["repository"]], cwd=project_path ).communicate() logger.debug('git fetch') p = AsyncPopen2( args=["git", "fetch"], cwd=project_path, env=self.gitenv ) p.on_output += self.collect_install_output p.on_end += yield gen.Callback("gitend") p.run() result = yield gen.Wait("gitend") if result != 0: logger.debug('Got return code %s', result) raise gen.Return(True) output = subprocess.Popen( args=["git", "rev-list", "HEAD..origin/HEAD"], cwd=project_path, stdout=subprocess.PIPE ).communicate()[0] if output.strip(): logger.debug('True') raise gen.Return(True) else: logger.debug('False') raise gen.Return(False) def collect_install_output(self, data): if isinstance(data, seesaw.six.binary_type): text = data.decode('ascii', 'replace') else: text = data sys.stdout.write(text) text = re.sub("[\x00-\x08\x0b\x0c]", "", text) self.install_output.append(text) @gen.coroutine def select_project(self, project_name): logger.debug('Select project %s', project_name) if project_name == "auto": yield self.update_warrior_hq() return if project_name not in self.projects: logger.debug("Project doesn't exist.") project_name = None if project_name != self.selected_project: # restart self.selected_project = project_name self.on_project_selected(self, project_name) yield self.start_selected_project() def clone_project(self, project_name, project_path): logger.debug('Clone project %s %s', project_name, project_path) version_string = subprocess.Popen( args=["git", "log", "-1", "--pretty=%h"], cwd=project_path, stdout=subprocess.PIPE ).communicate()[0].strip().decode('ascii') logger.debug('Cloning version %s', version_string) project_versioned_path = os.path.join( self.data_dir, "projects", "%s-%s" % (project_name, version_string)) if not os.path.exists(project_versioned_path): if not os.path.exists(os.path.join(self.data_dir, "projects")): os.makedirs(os.path.join(self.data_dir, "projects")) subprocess.Popen( args=["git", "clone", project_path, project_versioned_path], env=self.gitenv ).communicate() return project_versioned_path def load_pipeline(self, pipeline_path, context): logger.debug('Load pipeline %s', pipeline_path) dirname, basename = os.path.split(pipeline_path) if dirname == "": dirname = "." with open(pipeline_path) as f: pipeline_str = f.read() ConfigValue.start_collecting() local_context = context global_context = context curdir = os.getcwd() try: os.chdir(dirname) exec(pipeline_str, local_context, global_context) finally: os.chdir(curdir) config_values = ConfigValue.stop_collecting() project = local_context["project"] pipeline = local_context["pipeline"] pipeline.project = project return (project, pipeline, config_values) @gen.coroutine def start_selected_project(self, reinstall=False): logger.debug( 'Start selected project %s (reinstall=%s)', self.selected_project, reinstall ) project_name = self.selected_project if project_name in self.projects: # install or update project if necessary if project_name not in self.installed_projects or \ reinstall or \ (yield self.check_project_has_update(project_name)): result = yield self.install_project(project_name) if not result: logger.warning( "Project %s did not install correctly and " "we're ignoring this problem.", project_name ) return # remove the configuration variables from the previous project if self.current_project: for config_value in self.current_project.config_values: self.config_manager.remove(config_value.name) # the path with the project code # (this is the most recent code from the repository) project_path = os.path.join(self.projects_dir, project_name) # clone the project code to a versioned directory # where the pipeline is actually run project_versioned_path = self.clone_project(project_name, project_path) # load the pipeline from the versioned directory pipeline_path = os.path.join(project_versioned_path, "pipeline.py") (project, pipeline, config_values) = self.load_pipeline( pipeline_path, {"downloader": self.downloader}) # add the configuration values to the config manager for config_value in config_values: self.config_manager.add(config_value) project.config_values = config_values # start the pipeline if not self.shut_down_flag and not self.reboot_flag: self.runner.set_current_pipeline(pipeline) self.current_project_name = project_name self.current_project = project self.on_project_refresh(self, self.current_project, self.runner) self.fire_status() if not self.shut_down_flag and not self.reboot_flag: self.runner.start() else: # project_name not in self.projects, # stop the current project (if there is one) logger.debug("Project does not exist.") self.runner.set_current_pipeline(None) self.fire_status() def handle_runner_finish(self, runner): logger.info("Runner has finished.") if self.current_project: for config_value in self.current_project.config_values: self.config_manager.remove(config_value.name) self.current_project_name = None self.current_project = None self.on_project_refresh(self, self.current_project, self.runner) self.fire_status() if self.shut_down_flag or self.reboot_flag: ioloop.IOLoop.instance().stop() if self.real_shutdown: if self.shut_down_flag: os.system("sudo shutdown -h now") elif self.reboot_flag: os.system("sudo shutdown -r now") def start(self): io_loop = ioloop.IOLoop.instance() if self.real_shutdown: # schedule a reboot io_loop.add_timeout(datetime.timedelta(days=7), self.max_age_reached) self.hq_updater.start() self.project_updater.start() io_loop.add_future(self.update_warrior_hq(), lambda fut: fut.result()) io_loop.start() def max_age_reached(self): if self.real_shutdown: # time for an sanity reboot logger.info("Running for more than 7 days. Time to schedule a reboot.") self.reboot_gracefully() # schedule a forced reboot after two days self.schedule_forced_reboot() def reboot_gracefully(self): self.shut_down_flag = False self.reboot_flag = True self.fire_status() if self.runner.is_active(): self.runner.set_current_pipeline(None) else: ioloop.IOLoop.instance().stop() if self.real_shutdown: os.system("sudo shutdown -r now") def schedule_forced_reboot(self): if self.real_shutdown and not self.forced_reboot_timeout: self.forced_reboot_timeout = ioloop.IOLoop.instance().add_timeout( datetime.timedelta(days=2), self.forced_reboot) def forced_reboot(self): logger.info("Stopping immediately...") if self.real_shutdown: os.system("sudo shutdown -r now") def stop_gracefully(self): self.shut_down_flag = True self.reboot_flag = False self.fire_status() if self.runner.is_active(): self.runner.set_current_pipeline(None) else: ioloop.IOLoop.instance().stop() if self.real_shutdown: os.system("sudo shutdown -h now") def forced_stop(self): ioloop.IOLoop.instance().stop() if self.real_shutdown: os.system("sudo shutdown -h now") def keep_running(self): self.shut_down_flag = False self.reboot_flag = False ioloop.IOLoop.instance().add_future( self.start_selected_project(), lambda fut: fut.result() ) self.fire_status() class Status(object): UNINITIALIZED = 'UNINITIALIZED' NO_PROJECT = "NO_PROJECT" INVALID_SETTINGS = "INVALID_SETTINGS" STOPPING_PROJECT = "STOPPING_PROJECT" RESTARTING_PROJECT = "RESTARTING_PROJECT" RUNNING_PROJECT = "RUNNING_PROJECT" SWITCHING_PROJECT = "SWITCHING_PROJECT" STARTING_PROJECT = "STARTING_PROJECT" SHUTTING_DOWN = "SHUTTING_DOWN" REBOOTING = "REBOOTING" def fire_status(self): self.on_status(self, self.warrior_status()) def warrior_status(self): if self.shut_down_flag: return Warrior.Status.SHUTTING_DOWN elif self.reboot_flag: return Warrior.Status.REBOOTING elif realize(self.warrior_id) is None: return Warrior.Status.UNINITIALIZED elif not self.config_manager.all_valid(): return Warrior.Status.INVALID_SETTINGS elif self.selected_project is None and \ self.current_project_name is None: return Warrior.Status.NO_PROJECT elif self.selected_project: if self.selected_project == self.current_project_name: return Warrior.Status.RUNNING_PROJECT else: return Warrior.Status.STARTING_PROJECT else: return Warrior.Status.STOPPING_PROJECT
class Warrior(object): def __init__(self, projects_dir, data_dir, warrior_hq_url, real_shutdown=False): self.projects_dir = projects_dir self.data_dir = data_dir self.warrior_hq_url = warrior_hq_url self.real_shutdown = real_shutdown # disable the password prompts self.gitenv = dict( os.environ.items() + { 'GIT_ASKPASS': '******', 'SSH_ASKPASS': '******' }.items() ) self.warrior_id = StringConfigValue( name="warrior_id", title="Warrior ID", description="The unique number of your warrior instance.", editable=False ) self.selected_project_config_value = StringConfigValue( name="selected_project", title="Selected project", description="The project (to be continued when the warrior restarts).", default="none", editable=False ) self.downloader = StringConfigValue( name="downloader", title="Your nickname", description="We use your nickname to show your results on our tracker. Letters and numbers only.", regex="^[-_a-zA-Z0-9]{3,30}$" ) self.concurrent_items = NumberConfigValue( name="concurrent_items", title="Concurrent items", description="How many items should the warrior download at a time? (Max: 6)", min=1, max=6, default=2 ) self.config_manager = ConfigManager(os.path.join(projects_dir, "config.json")) self.config_manager.add(self.warrior_id) self.config_manager.add(self.selected_project_config_value) self.config_manager.add(self.downloader) self.config_manager.add(self.concurrent_items) self.bandwidth_monitor = BandwidthMonitor("eth0") self.bandwidth_monitor.update() self.runner = Runner(concurrent_items=self.concurrent_items) self.runner.on_finish += self.handle_runner_finish self.current_project_name = None self.current_project = None self.selected_project = None self.projects = {} self.installed_projects = set() self.failed_projects = set() self.on_projects_loaded = Event() self.on_project_installing = Event() self.on_project_installed = Event() self.on_project_installation_failed = Event() self.on_project_refresh = Event() self.on_project_selected = Event() self.on_status = Event() self.http_client = AsyncHTTPClient() self.installing = False self.shut_down_flag = False self.reboot_flag = False self.hq_updater = ioloop.PeriodicCallback(self.update_warrior_hq, 10*60*1000) self.project_updater = ioloop.PeriodicCallback(self.update_project, 60*60*1000) self.forced_reboot_timeout = None self.lat_lng = None self.find_lat_lng() def find_lat_lng(self): # response = self.http_client.fetch("http://www.maxmind.com/app/mylocation", self.handle_lat_lng, user_agent="") pass def handle_lat_lng(self, response): m = re.search(r"geoip-demo-results-tbodyLatitude/Longitude</td>\s*<td[^>]*>\s*([-/.0-9]+)\s*</td>", response.body) if m: self.lat_lng = m.group(1) def bandwidth_stats(self): self.bandwidth_monitor.update() return self.bandwidth_monitor.current_stats() @gen.engine def update_warrior_hq(self): if realize(self.warrior_id) == None: response = yield gen.Task(self.http_client.fetch, os.path.join(self.warrior_hq_url, "api/register.json"), method="POST", headers={"Content-Type": "application/json"}, user_agent=("ArchiveTeam Warrior/%s" % seesaw.__version__), body=json.dumps({"warrior":{"version": seesaw.__version__}})) if response.code == 200: data = json.loads(response.body) print "Received Warrior ID '%s'." % data["warrior_id"] self.config_manager.set_value("warrior_id", data["warrior_id"]) else: print "HTTP error %s" % (response.code) return else: print "Warrior ID '%s'." % realize(self.warrior_id) response = yield gen.Task(self.http_client.fetch, os.path.join(self.warrior_hq_url, "api/update.json"), method="POST", headers={"Content-Type": "application/json"}, user_agent=("ArchiveTeam Warrior/%s %s" % (seesaw.__version__, seesaw.runner_type)), body=json.dumps({"warrior":{ "warrior_id": realize(self.warrior_id), "lat_lng": self.lat_lng, "downloader": realize(self.downloader), "selected_project": realize(self.selected_project_config_value) }})) if response.code == 200: data = json.loads(response.body) if StrictVersion(seesaw.__version__) < StrictVersion(data["warrior"]["seesaw_version"]): # time for an update print "Reboot for Seesaw update." self.reboot_gracefully() # schedule a forced reboot after two days self.schedule_forced_reboot() return projects_list = data["projects"] self.projects = OrderedDict([ (project["name"], project) for project in projects_list ]) for project_data in self.projects.itervalues(): if "deadline" in project_data: project_data["deadline_int"] = time.mktime(time.strptime(project_data["deadline"], "%Y-%m-%dT%H:%M:%SZ")) previous_project_choice = realize(self.selected_project_config_value) if self.selected_project and not self.selected_project in self.projects: self.select_project(None) elif previous_project_choice in self.projects: # select previous project self.select_project(previous_project_choice) elif previous_project_choice == "auto": # ArchiveTeam's choice if "auto_project" in data: self.select_project(data["auto_project"]) else: self.select_project(None) self.on_projects_loaded(self, self.projects) else: print "HTTP error %s" % (response.code) @gen.engine def install_project(self, project_name, callback=None): self.installed_projects.discard(project_name) if project_name in self.projects and not self.installing: self.installing = project_name self.install_output = [] project = self.projects[project_name] project_path = os.path.join(self.projects_dir, project_name) self.on_project_installing(self, project) if project_name in self.failed_projects: if os.path.exists(project_path): shutil.rmtree(project_path) self.failed_projects.discard(project_name) if os.path.exists(project_path): subprocess.Popen( args=[ "git", "config", "remote.origin.url", project["repository"] ], cwd=project_path ).communicate() p = AsyncPopen( args=[ "git", "pull" ], cwd=project_path, env=self.gitenv ) else: p = AsyncPopen( args=[ "git", "clone", project["repository"], project_path ], env=self.gitenv ) p.on_output += self.collect_install_output p.on_end += yield gen.Callback("gitend") p.run() result = yield gen.Wait("gitend") if result != 0: self.install_output.append("\ngit returned %d\n" % result) self.on_project_installation_failed(self, project, "".join(self.install_output)) self.installing = None self.failed_projects.add(project_name) if callback: callback(False) return project_install_file = os.path.join(project_path, "warrior-install.sh") if os.path.exists(project_install_file): p = AsyncPopen( args=[ project_install_file ], cwd=project_path ) p.on_output += self.collect_install_output p.on_end += yield gen.Callback("installend") p.run() result = yield gen.Wait("installend") if result != 0: self.install_output.append("\nCustom installer returned %d\n" % result) self.on_project_installation_failed(self, project, "".join(self.install_output)) self.installing = None self.failed_projects.add(project_name) if callback: callback(False) return data_dir = os.path.join(self.data_dir, "data") if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) project_data_dir = os.path.join(project_path, "data") if os.path.islink(project_data_dir): os.remove(project_data_dir) elif os.path.isdir(project_data_dir): shutil.rmtree(project_data_dir) os.symlink(data_dir, project_data_dir) self.installed_projects.add(project_name) self.on_project_installed(self, project, "".join(self.install_output)) self.installing = None if callback: callback(True) @gen.engine def update_project(self): if self.selected_project and (yield gen.Task(self.check_project_has_update, self.selected_project)): # restart project self.start_selected_project() @gen.engine def check_project_has_update(self, project_name, callback): if project_name in self.projects: project = self.projects[project_name] project_path = os.path.join(self.projects_dir, project_name) self.install_output = [] if not os.path.exists(project_path): callback(True) return subprocess.Popen( args=[ "git", "config", "remote.origin.url", project["repository"] ], cwd=project_path ).communicate() p = AsyncPopen( args=[ "git", "fetch" ], cwd=project_path, env=self.gitenv ) p.on_output += self.collect_install_output p.on_end += yield gen.Callback("gitend") p.run() result = yield gen.Wait("gitend") if result != 0: callback(True) return output = subprocess.Popen( args=[ "git", "rev-list", "HEAD..FETCH_HEAD" ], cwd=project_path, stdout=subprocess.PIPE ).communicate()[0] if output.strip() != "": callback(True) else: callback(False) def collect_install_output(self, data): sys.stdout.write(data) data = re.sub("[\x00-\x08\x0b\x0c]", "", data) self.install_output.append(data) @gen.engine def select_project(self, project_name): if project_name == "auto": self.update_warrior_hq() return if not project_name in self.projects: project_name = None if project_name != self.selected_project: # restart self.selected_project = project_name self.on_project_selected(self, project_name) self.start_selected_project() def clone_project(self, project_name, project_path): version_string = subprocess.Popen( args=[ "git", "log", "-1", "--pretty=%h" ], cwd=project_path, stdout=subprocess.PIPE ).communicate()[0].strip() project_versioned_path = os.path.join(self.data_dir, "projects", "%s-%s" % (project_name, version_string)) if not os.path.exists(project_versioned_path): if not os.path.exists(os.path.join(self.data_dir, "projects")): os.makedirs(os.path.join(self.data_dir, "projects")) subprocess.Popen( args=[ "git", "clone", project_path, project_versioned_path ], env=self.gitenv ).communicate() return project_versioned_path def load_pipeline(self, pipeline_path, context): dirname, basename = os.path.split(pipeline_path) if dirname == "": dirname = "." with open(pipeline_path) as f: pipeline_str = f.read() ConfigValue.start_collecting() local_context = context global_context = context curdir = os.getcwd() try: os.chdir(dirname) exec pipeline_str in local_context, global_context finally: os.chdir(curdir) config_values = ConfigValue.stop_collecting() return ( local_context["project"], local_context["pipeline"], config_values ) @gen.engine def start_selected_project(self): project_name = self.selected_project if project_name in self.projects: # install or update project if necessary if not project_name in self.installed_projects or (yield gen.Task(self.check_project_has_update, project_name)): result = yield gen.Task(self.install_project, project_name) if not result: return # remove the configuration variables from the previous project if self.current_project: for config_value in self.current_project.config_values: self.config_manager.remove(config_value.name) # the path with the project code # (this is the most recent code from the repository) project_path = os.path.join(self.projects_dir, project_name) # clone the project code to a versioned directory # where the pipeline is actually run project_versioned_path = self.clone_project(project_name, project_path) # load the pipeline from the versioned directory pipeline_path = os.path.join(project_versioned_path, "pipeline.py") (project, pipeline, config_values) = self.load_pipeline(pipeline_path, { "downloader": self.downloader }) # add the configuration values to the config manager for config_value in config_values: self.config_manager.add(config_value) project.config_values = config_values # start the pipeline if not self.shut_down_flag and not self.reboot_flag: self.runner.set_current_pipeline(pipeline) self.current_project_name = project_name self.current_project = project self.on_project_refresh(self, self.current_project, self.runner) self.fire_status() if not self.shut_down_flag and not self.reboot_flag: self.runner.start() else: # project_name not in self.projects, # stop the current project (if there is one) self.runner.set_current_pipeline(None) self.fire_status() def handle_runner_finish(self, runner): if self.current_project: for config_value in self.current_project.config_values: self.config_manager.remove(config_value.name) self.current_project_name = None self.current_project = None self.on_project_refresh(self, self.current_project, self.runner) self.fire_status() if self.shut_down_flag or self.reboot_flag: ioloop.IOLoop.instance().stop() if self.real_shutdown: if self.shut_down_flag: os.system("sudo shutdown -h now") elif self.reboot_flag: os.system("sudo shutdown -r now") def start(self): if self.real_shutdown: # schedule a reboot ioloop.IOLoop.instance().add_timeout(datetime.timedelta(days=7), self.max_age_reached) self.hq_updater.start() self.project_updater.start() self.update_warrior_hq() ioloop.IOLoop.instance().start() def max_age_reached(self): if self.real_shutdown: # time for an sanity reboot print "Running for more than 7 days. Time to schedule a reboot." self.reboot_gracefully() # schedule a forced reboot after two days self.schedule_forced_reboot() def reboot_gracefully(self): self.shut_down_flag = False self.reboot_flag = True self.fire_status() if self.runner.is_active(): self.runner.set_current_pipeline(None) else: ioloop.IOLoop.instance().stop() if self.real_shutdown: os.system("sudo shutdown -r now") def schedule_forced_reboot(self): if self.real_shutdown and not self.forced_reboot_timeout: self.forced_reboot_timeout = ioloop.IOLoop.instance().add_timeout(datetime.timedelta(days=2), self.forced_reboot) def forced_reboot(self): if self.real_shutdown: os.system("sudo shutdown -r now") def stop_gracefully(self): self.shut_down_flag = True self.reboot_flag = False self.fire_status() if self.runner.is_active(): self.runner.set_current_pipeline(None) else: ioloop.IOLoop.instance().stop() if self.real_shutdown: os.system("sudo shutdown -h now") def keep_running(self): self.shut_down_flag = False self.reboot_flag = False self.start_selected_project() self.fire_status() class Status(object): NO_PROJECT = "NO_PROJECT" INVALID_SETTINGS = "INVALID_SETTINGS" STOPPING_PROJECT = "STOPPING_PROJECT" RESTARTING_PROJECT = "RESTARTING_PROJECT" RUNNING_PROJECT = "RUNNING_PROJECT" SWITCHING_PROJECT = "SWITCHING_PROJECT" STARTING_PROJECT = "STARTING_PROJECT" SHUTTING_DOWN = "SHUTTING_DOWN" REBOOTING = "REBOOTING" def fire_status(self): self.on_status(self, self.warrior_status()) def warrior_status(self): if self.shut_down_flag: return Warrior.Status.SHUTTING_DOWN elif self.reboot_flag: return Warrior.Status.REBOOTING elif not self.config_manager.all_valid(): return Warrior.Status.INVALID_SETTINGS elif self.selected_project == None and self.current_project_name == None: return Warrior.Status.NO_PROJECT elif self.selected_project: if self.selected_project == self.current_project_name: return Warrior.Status.RUNNING_PROJECT else: return Warrior.Status.STARTING_PROJECT else: return Warrior.Status.STOPPING_PROJECT
class Warrior(object): '''The warrior god object.''' def __init__(self, projects_dir, data_dir, warrior_hq_url, real_shutdown=False, keep_data=False): if not os.access(projects_dir, os.W_OK): raise Exception("Couldn't write to projects directory: %s" % projects_dir) if not os.access(data_dir, os.W_OK): raise Exception("Couldn't write to data directory: %s" % data_dir) self.projects_dir = projects_dir self.data_dir = data_dir self.warrior_hq_url = warrior_hq_url self.real_shutdown = real_shutdown self.keep_data = keep_data # disable the password prompts self.gitenv = dict( list(os.environ.items()) + list({ 'GIT_ASKPASS': '******', 'SSH_ASKPASS': '******' }.items())) self.warrior_id = StringConfigValue( name="warrior_id", title="Warrior ID", description="The unique number of your warrior instance.", editable=False) self.selected_project_config_value = StringConfigValue( name="selected_project", title="Selected project", description="The project (to be continued when the warrior " "restarts).", default="none", editable=False) self.downloader = StringConfigValue( name="downloader", title="Your nickname", description="We use your nickname to show your results on our " "tracker. Letters and numbers only.", regex="^[-_a-zA-Z0-9]{3,30}$", advanced=False) self.concurrent_items = NumberConfigValue( name="concurrent_items", title="Concurrent items", description="How many items should the warrior download at a " "time? (Max: 6)", min=1, max=6, default=2) self.http_username = StringConfigValue( name="http_username", title="HTTP username", description="Enter a username to protect the web interface, " "or leave empty.", default="") self.http_password = StringConfigValue( name="http_password", title="HTTP password", description="Enter a password to protect the web interface, " "or leave empty.", default="") self.config_manager = ConfigManager( os.path.join(projects_dir, "config.json")) self.config_manager.add(self.warrior_id) self.config_manager.add(self.selected_project_config_value) self.config_manager.add(self.downloader) self.config_manager.add(self.concurrent_items) self.config_manager.add(self.http_username) self.config_manager.add(self.http_password) self.bandwidth_monitor = BandwidthMonitor("eth0") self.bandwidth_monitor.update() self.runner = Runner(concurrent_items=self.concurrent_items, keep_data=self.keep_data) self.runner.on_finish += self.handle_runner_finish self.current_project_name = None self.current_project = None self.selected_project = None self.projects = {} self.installed_projects = set() self.failed_projects = set() self.on_projects_loaded = Event() self.on_project_installing = Event() self.on_project_installed = Event() self.on_project_installation_failed = Event() self.on_project_refresh = Event() self.on_project_selected = Event() self.on_status = Event() self.on_broadcast_message_received = Event() self.http_client = AsyncHTTPClient() self.installing = False self.shut_down_flag = False self.reboot_flag = False io_loop = ioloop.IOLoop.instance() def update_warror_callback(): io_loop.add_future(self.update_warrior_hq(), lambda fut: fut.result()) def update_project_callback(): io_loop.add_future(self.update_project(), lambda fut: fut.result()) self.hq_updater = ioloop.PeriodicCallback(update_warror_callback, 10 * 60 * 1000) self.project_updater = ioloop.PeriodicCallback(update_project_callback, 30 * 60 * 1000) self.forced_reboot_timeout = None self.lat_lng = None self.find_lat_lng() self.install_output = None self.broadcast_message = None self.contacting_hq_failed = False def find_lat_lng(self): # response = self.http_client.fetch("http://www.maxmind.com/app/mylocation", self.handle_lat_lng, user_agent="") pass def handle_lat_lng(self, response): m = re.search( r"geoip-demo-results-tbodyLatitude/Longitude</td>" r"\s*<td[^>]*>\s*([-/.0-9]+)\s*</td>", response.body) if m: self.lat_lng = m.group(1) def bandwidth_stats(self): self.bandwidth_monitor.update() return self.bandwidth_monitor.current_stats() @gen.coroutine def update_warrior_hq(self): logger.debug('Update warrior hq.') if realize(self.warrior_id) is None: headers = {"Content-Type": "application/json"} user_agent = "ArchiveTeam Warrior/%s" % seesaw.__version__ body = json.dumps({"warrior": {"version": seesaw.__version__}}) response = yield self.http_client.fetch(os.path.join( self.warrior_hq_url, "api/register.json"), method="POST", headers=headers, user_agent=user_agent, body=body) if response.code == 200: data = json.loads(response.body.decode('utf-8')) logger.info("Received Warrior ID '%s'." % data["warrior_id"]) self.config_manager.set_value("warrior_id", data["warrior_id"]) self.fire_status() else: logger.error("HTTP error %s" % (response.code)) self.fire_status() return else: logger.debug("Warrior ID '%s'." % realize(self.warrior_id)) headers = {"Content-Type": "application/json"} user_agent = "ArchiveTeam Warrior/%s %s" % (seesaw.__version__, seesaw.runner_type) body = json.dumps({ "warrior": { "warrior_id": realize(self.warrior_id), "lat_lng": self.lat_lng, "downloader": realize(self.downloader), "selected_project": realize(self.selected_project_config_value) } }) response = yield self.http_client.fetch(os.path.join( self.warrior_hq_url, "api/update.json"), method="POST", headers=headers, user_agent=user_agent, body=body) if response.code == 200: data = json.loads(response.body.decode('utf-8')) if StrictVersion(seesaw.__version__) < \ StrictVersion(data["warrior"]["seesaw_version"]): # time for an update logger.info("Reboot for Seesaw update.") self.reboot_gracefully() # schedule a forced reboot after two days self.schedule_forced_reboot() return projects_list = data["projects"] self.projects = OrderedDict([(project["name"], project) for project in projects_list]) for project_data in self.projects.values(): if "deadline" in project_data: project_data["deadline_int"] = time.mktime( time.strptime(project_data["deadline"], "%Y-%m-%dT%H:%M:%SZ")) previous_project_choice = realize( self.selected_project_config_value) if self.selected_project and \ self.selected_project not in self.projects: yield self.select_project(None) elif previous_project_choice in self.projects: # select previous project yield self.select_project(previous_project_choice) elif previous_project_choice == "auto": # ArchiveTeam's choice if "auto_project" in data: yield self.select_project(data["auto_project"]) else: yield self.select_project(None) self.contacting_hq_failed = False self.on_projects_loaded(self, self.projects) self.broadcast_message = data.get('broadcast_message') self.on_broadcast_message_received(self, data.get('broadcast_message')) else: logger.error("HTTP error %s" % (response.code)) self.contacting_hq_failed = True # We don't set projects to {} because it causes the # "Stop Current" project button to disappear for name in tuple(self.projects): if name != self.selected_project: del self.projects[name] self.on_projects_loaded(self, self.projects) @gen.coroutine def install_project(self, project_name): logger.debug('Install project %s', project_name) self.installed_projects.discard(project_name) if project_name in self.projects and not self.installing: self.installing = project_name self.install_output = [] project = self.projects[project_name] project_path = os.path.join(self.projects_dir, project_name) self.on_project_installing(self, project) if project_name in self.failed_projects: if os.path.exists(project_path): shutil.rmtree(project_path) self.failed_projects.discard(project_name) if os.path.exists(project_path): subprocess.Popen(args=[ "git", "config", "remote.origin.url", project["repository"] ], cwd=project_path).communicate() logger.debug('git pull from %s', project["repository"]) p = AsyncPopen2(args=["git", "pull"], cwd=project_path, env=self.gitenv) else: logger.debug('git clone') p = AsyncPopen2( args=["git", "clone", project["repository"], project_path], env=self.gitenv) p.on_output += self.collect_install_output p.on_end += yield gen.Callback("gitend") p.run() result = yield gen.Wait("gitend") if result != 0: self.install_output.append("\ngit returned %d\n" % result) logger.error("Project failed to install: %s", "".join(self.install_output)) self.on_project_installation_failed( self, project, "".join(self.install_output)) self.installing = None self.failed_projects.add(project_name) raise gen.Return(False) else: logger.debug("git operation: %s", "".join(self.install_output)) project_install_file = os.path.join(project_path, "warrior-install.sh") if os.path.exists(project_install_file): p = AsyncPopen2(args=[project_install_file], cwd=project_path) p.on_output += self.collect_install_output p.on_end += yield gen.Callback("installend") p.run() result = yield gen.Wait("installend") if result != 0: self.install_output.append( "\nCustom installer returned %d\n" % result) logger.error("Custom installer failed to install: %s", "".join(self.install_output)) self.on_project_installation_failed( self, project, "".join(self.install_output)) self.installing = None self.failed_projects.add(project_name) raise gen.Return(False) data_dir = os.path.join(self.data_dir, "data") if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) project_data_dir = os.path.join(project_path, "data") if os.path.islink(project_data_dir): os.remove(project_data_dir) elif os.path.isdir(project_data_dir): shutil.rmtree(project_data_dir) os.symlink(data_dir, project_data_dir) self.installed_projects.add(project_name) logger.debug('Install complete %s', "".join(self.install_output)) self.on_project_installed(self, project, "".join(self.install_output)) self.installing = None raise gen.Return(True) @gen.coroutine def update_project(self): logger.debug('Update project.') if self.selected_project and \ (yield self.check_project_has_update(self.selected_project)): # restart project yield self.start_selected_project(reinstall=True) @gen.coroutine def check_project_has_update(self, project_name): logger.debug('Check project has update %s', project_name) if project_name in self.projects: project = self.projects[project_name] project_path = os.path.join(self.projects_dir, project_name) self.install_output = [] if not os.path.exists(project_path): logger.debug("Project doesn't exist.") raise gen.Return(True) subprocess.Popen(args=[ "git", "config", "remote.origin.url", project["repository"] ], cwd=project_path).communicate() logger.debug('git fetch') p = AsyncPopen2(args=["git", "fetch"], cwd=project_path, env=self.gitenv) p.on_output += self.collect_install_output p.on_end += yield gen.Callback("gitend") p.run() result = yield gen.Wait("gitend") if result != 0: logger.debug('Got return code %s', result) raise gen.Return(True) output = subprocess.Popen( args=["git", "rev-list", "HEAD..origin/HEAD"], cwd=project_path, stdout=subprocess.PIPE).communicate()[0] if output.strip(): logger.debug('True') raise gen.Return(True) else: logger.debug('False') raise gen.Return(False) def collect_install_output(self, data): if isinstance(data, seesaw.six.binary_type): text = data.decode('ascii', 'replace') else: text = data sys.stdout.write(text) text = re.sub("[\x00-\x08\x0b\x0c]", "", text) self.install_output.append(text) @gen.coroutine def select_project(self, project_name): logger.debug('Select project %s', project_name) if project_name == "auto": yield self.update_warrior_hq() return if project_name not in self.projects: logger.debug("Project doesn't exist.") project_name = None if project_name != self.selected_project: # restart self.selected_project = project_name self.on_project_selected(self, project_name) yield self.start_selected_project() def clone_project(self, project_name, project_path): logger.debug('Clone project %s %s', project_name, project_path) version_string = subprocess.Popen( args=["git", "log", "-1", "--pretty=%h"], cwd=project_path, stdout=subprocess.PIPE).communicate()[0].strip().decode('ascii') logger.debug('Cloning version %s', version_string) project_versioned_path = os.path.join( self.data_dir, "projects", "%s-%s" % (project_name, version_string)) if not os.path.exists(project_versioned_path): if not os.path.exists(os.path.join(self.data_dir, "projects")): os.makedirs(os.path.join(self.data_dir, "projects")) subprocess.Popen( args=["git", "clone", project_path, project_versioned_path], env=self.gitenv).communicate() return project_versioned_path def load_pipeline(self, pipeline_path, context): logger.debug('Load pipeline %s', pipeline_path) dirname, basename = os.path.split(pipeline_path) if dirname == "": dirname = "." with open(pipeline_path) as f: pipeline_str = f.read() ConfigValue.start_collecting() local_context = context global_context = context curdir = os.getcwd() try: os.chdir(dirname) exec(pipeline_str, local_context, global_context) finally: os.chdir(curdir) config_values = ConfigValue.stop_collecting() project = local_context["project"] pipeline = local_context["pipeline"] pipeline.project = project return (project, pipeline, config_values) @gen.coroutine def start_selected_project(self, reinstall=False): logger.debug('Start selected project %s (reinstall=%s)', self.selected_project, reinstall) project_name = self.selected_project if project_name in self.projects: # install or update project if necessary if project_name not in self.installed_projects or \ reinstall or \ (yield self.check_project_has_update(project_name)): result = yield self.install_project(project_name) if not result: logger.warning( "Project %s did not install correctly and " "we're ignoring this problem.", project_name) return # remove the configuration variables from the previous project if self.current_project: for config_value in self.current_project.config_values: self.config_manager.remove(config_value.name) # the path with the project code # (this is the most recent code from the repository) project_path = os.path.join(self.projects_dir, project_name) # clone the project code to a versioned directory # where the pipeline is actually run project_versioned_path = self.clone_project( project_name, project_path) # load the pipeline from the versioned directory pipeline_path = os.path.join(project_versioned_path, "pipeline.py") (project, pipeline, config_values) = self.load_pipeline( pipeline_path, {"downloader": self.downloader}) # add the configuration values to the config manager for config_value in config_values: self.config_manager.add(config_value) project.config_values = config_values # start the pipeline if not self.shut_down_flag and not self.reboot_flag: self.runner.set_current_pipeline(pipeline) self.current_project_name = project_name self.current_project = project self.on_project_refresh(self, self.current_project, self.runner) self.fire_status() if not self.shut_down_flag and not self.reboot_flag: self.runner.start() else: # project_name not in self.projects, # stop the current project (if there is one) logger.debug("Project does not exist.") self.runner.set_current_pipeline(None) self.fire_status() def handle_runner_finish(self, runner): logger.info("Runner has finished.") if self.current_project: for config_value in self.current_project.config_values: self.config_manager.remove(config_value.name) self.current_project_name = None self.current_project = None self.on_project_refresh(self, self.current_project, self.runner) self.fire_status() if self.shut_down_flag or self.reboot_flag: ioloop.IOLoop.instance().stop() if self.real_shutdown: if self.shut_down_flag: system_shutdown() elif self.reboot_flag: system_reboot() def start(self): io_loop = ioloop.IOLoop.instance() if self.real_shutdown: # schedule a reboot io_loop.add_timeout(datetime.timedelta(days=7), self.max_age_reached) self.hq_updater.start() self.project_updater.start() io_loop.add_future(self.update_warrior_hq(), lambda fut: fut.result()) io_loop.start() def max_age_reached(self): if self.real_shutdown: # time for an sanity reboot logger.info( "Running for more than 7 days. Time to schedule a reboot.") self.reboot_gracefully() # schedule a forced reboot after two days self.schedule_forced_reboot() def reboot_gracefully(self): self.shut_down_flag = False self.reboot_flag = True self.fire_status() if self.runner.is_active(): self.runner.set_current_pipeline(None) else: ioloop.IOLoop.instance().stop() if self.real_shutdown: system_reboot() def schedule_forced_reboot(self): if self.real_shutdown and not self.forced_reboot_timeout: self.forced_reboot_timeout = ioloop.IOLoop.instance().add_timeout( datetime.timedelta(days=2), self.forced_reboot) def forced_reboot(self): logger.info("Stopping immediately...") if self.real_shutdown: system_reboot() def stop_gracefully(self): self.shut_down_flag = True self.reboot_flag = False self.fire_status() if self.runner.is_active(): self.runner.set_current_pipeline(None) else: ioloop.IOLoop.instance().stop() if self.real_shutdown: system_shutdown() def forced_stop(self): ioloop.IOLoop.instance().stop() if self.real_shutdown: system_shutdown() def keep_running(self): self.shut_down_flag = False self.reboot_flag = False ioloop.IOLoop.instance().add_future(self.start_selected_project(), lambda fut: fut.result()) self.fire_status() class Status(object): UNINITIALIZED = 'UNINITIALIZED' NO_PROJECT = "NO_PROJECT" INVALID_SETTINGS = "INVALID_SETTINGS" STOPPING_PROJECT = "STOPPING_PROJECT" RESTARTING_PROJECT = "RESTARTING_PROJECT" RUNNING_PROJECT = "RUNNING_PROJECT" SWITCHING_PROJECT = "SWITCHING_PROJECT" STARTING_PROJECT = "STARTING_PROJECT" SHUTTING_DOWN = "SHUTTING_DOWN" REBOOTING = "REBOOTING" def fire_status(self): self.on_status(self, self.warrior_status()) def warrior_status(self): if self.shut_down_flag: return Warrior.Status.SHUTTING_DOWN elif self.reboot_flag: return Warrior.Status.REBOOTING elif realize(self.warrior_id) is None: return Warrior.Status.UNINITIALIZED elif not self.config_manager.all_valid(): return Warrior.Status.INVALID_SETTINGS elif self.selected_project is None and \ self.current_project_name is None: return Warrior.Status.NO_PROJECT elif self.selected_project: if self.selected_project == self.current_project_name: return Warrior.Status.RUNNING_PROJECT else: return Warrior.Status.STARTING_PROJECT else: return Warrior.Status.STOPPING_PROJECT
class Warrior(object): def __init__(self, projects_dir, data_dir, warrior_hq_url, real_shutdown=False, keep_data=False): if not os.access(projects_dir, os.W_OK): raise Exception("Couldn't write to projects directory: %s" % projects_dir) if not os.access(data_dir, os.W_OK): raise Exception("Couldn't write to data directory: %s" % data_dir) self.projects_dir = projects_dir self.data_dir = data_dir self.warrior_hq_url = warrior_hq_url self.real_shutdown = real_shutdown self.keep_data = keep_data # disable the password prompts self.gitenv = dict(os.environ.items() + { 'GIT_ASKPASS': '******', 'SSH_ASKPASS': '******' }.items()) self.warrior_id = StringConfigValue( name="warrior_id", title="Warrior ID", description="The unique number of your warrior instance.", editable=False) self.selected_project_config_value = StringConfigValue( name="selected_project", title="Selected project", description= "The project (to be continued when the warrior restarts).", default="none", editable=False) self.downloader = StringConfigValue( name="downloader", title="Your nickname", description= "We use your nickname to show your results on our tracker. Letters and numbers only.", regex="^[-_a-zA-Z0-9]{3,30}$", advanced=False) self.concurrent_items = NumberConfigValue( name="concurrent_items", title="Concurrent items", description= "How many items should the warrior download at a time? (Max: 6)", min=1, max=6, default=2) self.http_username = StringConfigValue( name="http_username", title="HTTP username", description= "Enter a username to protect the web interface, or leave empty.", default="") self.http_password = StringConfigValue( name="http_password", title="HTTP password", description= "Enter a password to protect the web interface, or leave empty.", default="") self.config_manager = ConfigManager( os.path.join(projects_dir, "config.json")) self.config_manager.add(self.warrior_id) self.config_manager.add(self.selected_project_config_value) self.config_manager.add(self.downloader) self.config_manager.add(self.concurrent_items) self.config_manager.add(self.http_username) self.config_manager.add(self.http_password) self.bandwidth_monitor = BandwidthMonitor("eth0") self.bandwidth_monitor.update() self.runner = Runner(concurrent_items=self.concurrent_items, keep_data=self.keep_data) self.runner.on_finish += self.handle_runner_finish self.current_project_name = None self.current_project = None self.selected_project = None self.projects = {} self.installed_projects = set() self.failed_projects = set() self.on_projects_loaded = Event() self.on_project_installing = Event() self.on_project_installed = Event() self.on_project_installation_failed = Event() self.on_project_refresh = Event() self.on_project_selected = Event() self.on_status = Event() self.http_client = AsyncHTTPClient() self.installing = False self.shut_down_flag = False self.reboot_flag = False self.hq_updater = ioloop.PeriodicCallback(self.update_warrior_hq, 10 * 60 * 1000) self.project_updater = ioloop.PeriodicCallback(self.update_project, 60 * 60 * 1000) self.forced_reboot_timeout = None self.lat_lng = None self.find_lat_lng() def find_lat_lng(self): # response = self.http_client.fetch("http://www.maxmind.com/app/mylocation", self.handle_lat_lng, user_agent="") pass def handle_lat_lng(self, response): m = re.search( r"geoip-demo-results-tbodyLatitude/Longitude</td>\s*<td[^>]*>\s*([-/.0-9]+)\s*</td>", response.body) if m: self.lat_lng = m.group(1) def bandwidth_stats(self): self.bandwidth_monitor.update() return self.bandwidth_monitor.current_stats() @gen.engine def update_warrior_hq(self): if realize(self.warrior_id) == None: response = yield gen.Task( self.http_client.fetch, os.path.join(self.warrior_hq_url, "api/register.json"), method="POST", headers={"Content-Type": "application/json"}, user_agent=("ArchiveTeam Warrior/%s" % seesaw.__version__), body=json.dumps({"warrior": { "version": seesaw.__version__ }})) if response.code == 200: data = json.loads(response.body) print "Received Warrior ID '%s'." % data["warrior_id"] self.config_manager.set_value("warrior_id", data["warrior_id"]) else: print "HTTP error %s" % (response.code) return else: print "Warrior ID '%s'." % realize(self.warrior_id) response = yield gen.Task( self.http_client.fetch, os.path.join(self.warrior_hq_url, "api/update.json"), method="POST", headers={"Content-Type": "application/json"}, user_agent=("ArchiveTeam Warrior/%s %s" % (seesaw.__version__, seesaw.runner_type)), body=json.dumps({ "warrior": { "warrior_id": realize(self.warrior_id), "lat_lng": self.lat_lng, "downloader": realize(self.downloader), "selected_project": realize(self.selected_project_config_value) } })) if response.code == 200: data = json.loads(response.body) if StrictVersion(seesaw.__version__) < StrictVersion( data["warrior"]["seesaw_version"]): # time for an update print "Reboot for Seesaw update." self.reboot_gracefully() # schedule a forced reboot after two days self.schedule_forced_reboot() return projects_list = data["projects"] self.projects = OrderedDict([(project["name"], project) for project in projects_list]) for project_data in self.projects.itervalues(): if "deadline" in project_data: project_data["deadline_int"] = time.mktime( time.strptime(project_data["deadline"], "%Y-%m-%dT%H:%M:%SZ")) previous_project_choice = realize( self.selected_project_config_value) if self.selected_project and not self.selected_project in self.projects: self.select_project(None) elif previous_project_choice in self.projects: # select previous project self.select_project(previous_project_choice) elif previous_project_choice == "auto": # ArchiveTeam's choice if "auto_project" in data: self.select_project(data["auto_project"]) else: self.select_project(None) self.on_projects_loaded(self, self.projects) else: print "HTTP error %s" % (response.code) @gen.engine def install_project(self, project_name, callback=None): self.installed_projects.discard(project_name) if project_name in self.projects and not self.installing: self.installing = project_name self.install_output = [] project = self.projects[project_name] project_path = os.path.join(self.projects_dir, project_name) self.on_project_installing(self, project) if project_name in self.failed_projects: if os.path.exists(project_path): shutil.rmtree(project_path) self.failed_projects.discard(project_name) if os.path.exists(project_path): subprocess.Popen(args=[ "git", "config", "remote.origin.url", project["repository"] ], cwd=project_path).communicate() p = AsyncPopen(args=["git", "pull"], cwd=project_path, env=self.gitenv) else: p = AsyncPopen( args=["git", "clone", project["repository"], project_path], env=self.gitenv) p.on_output += self.collect_install_output p.on_end += yield gen.Callback("gitend") p.run() result = yield gen.Wait("gitend") if result != 0: self.install_output.append("\ngit returned %d\n" % result) self.on_project_installation_failed( self, project, "".join(self.install_output)) self.installing = None self.failed_projects.add(project_name) if callback: callback(False) return project_install_file = os.path.join(project_path, "warrior-install.sh") if os.path.exists(project_install_file): p = AsyncPopen(args=[project_install_file], cwd=project_path) p.on_output += self.collect_install_output p.on_end += yield gen.Callback("installend") p.run() result = yield gen.Wait("installend") if result != 0: self.install_output.append( "\nCustom installer returned %d\n" % result) self.on_project_installation_failed( self, project, "".join(self.install_output)) self.installing = None self.failed_projects.add(project_name) if callback: callback(False) return data_dir = os.path.join(self.data_dir, "data") if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) project_data_dir = os.path.join(project_path, "data") if os.path.islink(project_data_dir): os.remove(project_data_dir) elif os.path.isdir(project_data_dir): shutil.rmtree(project_data_dir) os.symlink(data_dir, project_data_dir) self.installed_projects.add(project_name) self.on_project_installed(self, project, "".join(self.install_output)) self.installing = None if callback: callback(True) @gen.engine def update_project(self): if self.selected_project and (yield gen.Task( self.check_project_has_update, self.selected_project)): # restart project self.start_selected_project() @gen.engine def check_project_has_update(self, project_name, callback): if project_name in self.projects: project = self.projects[project_name] project_path = os.path.join(self.projects_dir, project_name) self.install_output = [] if not os.path.exists(project_path): callback(True) return subprocess.Popen(args=[ "git", "config", "remote.origin.url", project["repository"] ], cwd=project_path).communicate() p = AsyncPopen(args=["git", "fetch"], cwd=project_path, env=self.gitenv) p.on_output += self.collect_install_output p.on_end += yield gen.Callback("gitend") p.run() result = yield gen.Wait("gitend") if result != 0: callback(True) return output = subprocess.Popen( args=["git", "rev-list", "HEAD..FETCH_HEAD"], cwd=project_path, stdout=subprocess.PIPE).communicate()[0] if output.strip() != "": callback(True) else: callback(False) def collect_install_output(self, data): sys.stdout.write(data) data = re.sub("[\x00-\x08\x0b\x0c]", "", data) self.install_output.append(data) @gen.engine def select_project(self, project_name): if project_name == "auto": self.update_warrior_hq() return if not project_name in self.projects: project_name = None if project_name != self.selected_project: # restart self.selected_project = project_name self.on_project_selected(self, project_name) self.start_selected_project() def clone_project(self, project_name, project_path): version_string = subprocess.Popen( args=["git", "log", "-1", "--pretty=%h"], cwd=project_path, stdout=subprocess.PIPE).communicate()[0].strip() project_versioned_path = os.path.join( self.data_dir, "projects", "%s-%s" % (project_name, version_string)) if not os.path.exists(project_versioned_path): if not os.path.exists(os.path.join(self.data_dir, "projects")): os.makedirs(os.path.join(self.data_dir, "projects")) subprocess.Popen( args=["git", "clone", project_path, project_versioned_path], env=self.gitenv).communicate() return project_versioned_path def load_pipeline(self, pipeline_path, context): dirname, basename = os.path.split(pipeline_path) if dirname == "": dirname = "." with open(pipeline_path) as f: pipeline_str = f.read() ConfigValue.start_collecting() local_context = context global_context = context curdir = os.getcwd() try: os.chdir(dirname) exec pipeline_str in local_context, global_context finally: os.chdir(curdir) config_values = ConfigValue.stop_collecting() return (local_context["project"], local_context["pipeline"], config_values) @gen.engine def start_selected_project(self): project_name = self.selected_project if project_name in self.projects: # install or update project if necessary if not project_name in self.installed_projects or (yield gen.Task( self.check_project_has_update, project_name)): result = yield gen.Task(self.install_project, project_name) if not result: return # remove the configuration variables from the previous project if self.current_project: for config_value in self.current_project.config_values: self.config_manager.remove(config_value.name) # the path with the project code # (this is the most recent code from the repository) project_path = os.path.join(self.projects_dir, project_name) # clone the project code to a versioned directory # where the pipeline is actually run project_versioned_path = self.clone_project( project_name, project_path) # load the pipeline from the versioned directory pipeline_path = os.path.join(project_versioned_path, "pipeline.py") (project, pipeline, config_values) = self.load_pipeline( pipeline_path, {"downloader": self.downloader}) # add the configuration values to the config manager for config_value in config_values: self.config_manager.add(config_value) project.config_values = config_values # start the pipeline if not self.shut_down_flag and not self.reboot_flag: self.runner.set_current_pipeline(pipeline) self.current_project_name = project_name self.current_project = project self.on_project_refresh(self, self.current_project, self.runner) self.fire_status() if not self.shut_down_flag and not self.reboot_flag: self.runner.start() else: # project_name not in self.projects, # stop the current project (if there is one) self.runner.set_current_pipeline(None) self.fire_status() def handle_runner_finish(self, runner): if self.current_project: for config_value in self.current_project.config_values: self.config_manager.remove(config_value.name) self.current_project_name = None self.current_project = None self.on_project_refresh(self, self.current_project, self.runner) self.fire_status() if self.shut_down_flag or self.reboot_flag: ioloop.IOLoop.instance().stop() if self.real_shutdown: if self.shut_down_flag: os.system("sudo shutdown -h now") elif self.reboot_flag: os.system("sudo shutdown -r now") def start(self): if self.real_shutdown: # schedule a reboot ioloop.IOLoop.instance().add_timeout(datetime.timedelta(days=7), self.max_age_reached) self.hq_updater.start() self.project_updater.start() self.update_warrior_hq() ioloop.IOLoop.instance().start() def max_age_reached(self): if self.real_shutdown: # time for an sanity reboot print "Running for more than 7 days. Time to schedule a reboot." self.reboot_gracefully() # schedule a forced reboot after two days self.schedule_forced_reboot() def reboot_gracefully(self): self.shut_down_flag = False self.reboot_flag = True self.fire_status() if self.runner.is_active(): self.runner.set_current_pipeline(None) else: ioloop.IOLoop.instance().stop() if self.real_shutdown: os.system("sudo shutdown -r now") def schedule_forced_reboot(self): if self.real_shutdown and not self.forced_reboot_timeout: self.forced_reboot_timeout = ioloop.IOLoop.instance().add_timeout( datetime.timedelta(days=2), self.forced_reboot) def forced_reboot(self): print "Stopping immediately..." if self.real_shutdown: os.system("sudo shutdown -r now") def stop_gracefully(self): self.shut_down_flag = True self.reboot_flag = False self.fire_status() if self.runner.is_active(): self.runner.set_current_pipeline(None) else: ioloop.IOLoop.instance().stop() if self.real_shutdown: os.system("sudo shutdown -h now") def forced_stop(self): ioloop.IOLoop.instance().stop() if self.real_shutdown: os.system("sudo shutdown -h now") def keep_running(self): self.shut_down_flag = False self.reboot_flag = False self.start_selected_project() self.fire_status() class Status(object): NO_PROJECT = "NO_PROJECT" INVALID_SETTINGS = "INVALID_SETTINGS" STOPPING_PROJECT = "STOPPING_PROJECT" RESTARTING_PROJECT = "RESTARTING_PROJECT" RUNNING_PROJECT = "RUNNING_PROJECT" SWITCHING_PROJECT = "SWITCHING_PROJECT" STARTING_PROJECT = "STARTING_PROJECT" SHUTTING_DOWN = "SHUTTING_DOWN" REBOOTING = "REBOOTING" def fire_status(self): self.on_status(self, self.warrior_status()) def warrior_status(self): if self.shut_down_flag: return Warrior.Status.SHUTTING_DOWN elif self.reboot_flag: return Warrior.Status.REBOOTING elif not self.config_manager.all_valid(): return Warrior.Status.INVALID_SETTINGS elif self.selected_project == None and self.current_project_name == None: return Warrior.Status.NO_PROJECT elif self.selected_project: if self.selected_project == self.current_project_name: return Warrior.Status.RUNNING_PROJECT else: return Warrior.Status.STARTING_PROJECT else: return Warrior.Status.STOPPING_PROJECT