def get_local_refs(): ts = get_ts() region_name = config.region_name product_name = config.product_name group_name = config.group_name # find the refs that tenants want to run on this group in this region rows = ts.get_table('gameservers-instances').find({ 'group_name': group_name, 'product_name': product_name, 'region': region_name }) refs = set() if not rows: logger.error( "No Gameserver instances configured for product '%s' and group '%s' on region '%s'" % (product_name, group_name, region_name)) return refs for r in rows: if not r.get('enabled', False): logger.warning('Task %s, %s is disabled', r['ref'], r['tenant_name']) continue refs.add((r['ref'], r['tenant_name'])) return refs
def get_manifest(ref): index_file = get_index() try: refitem = [ refitem for refitem in index_file["refs"] if refitem["ref"] == ref and refitem["target_platform"] == "WindowsServer" ][0] except IndexError: logger.warning("Ref '%s' not found in index file", ref) return None path = refitem["build_manifest"] folder = "config/{repo}/".format(repo=config.BUILD_PATH) local_filename = os.path.join(folder, path.split("/")[-1]) cnt = 0 while 1: try: with open(local_filename, "r") as f: manifest = json.load(f) break except Exception as e: cnt += 1 if cnt < 10: logger.info("Cannot get manifest from file. Retrying...") time.sleep(1.0) else: logger.error("Unable to get manifest from file '%s'. %s", local_filename, e) return manifest
def get_region(): #return 'eu-west-1' #!!!!!!!!! try: r = requests.get(ec2_metadata + "placement/" "availability-zone", timeout=0.5) region = r.text.strip()[:-1] # skip the a, b, c at the end return region except Exception as e: logger.error("Cannot find region. %s" % e) return None
def get_refined_interpretations(input_string): keywords = ['Genetic Variant', 'Positive', 'Significant Mutation'] status = 'NEGATIVE' try: if any(word.upper() in str(input_string).upper() for word in keywords): status = 'POSITIVE' return status except: status_message = 'ERROR : Error occurred while preparing data for HBase.\nERROR in ', locals( ), ' while running. ERROR MESSAGE: ' + str(traceback.format_exc()) logger.error(status_message) return
def _request(self, method, url, data, expect=None, retry=False): expect = expect or [200] r = self._sess.request(method, url, data=json.dumps(data, indent=4)) if r.status_code not in expect: if "Invalid JTI" in r.text: if not retry: logger.warning( "Authorization header '%s' is invalid. Reauthenticating...", self._sess.headers["Authorization"]) token = get_auth_token(self.tenant, "battledaemon")["jti"] self._sess.headers["Authorization"] = "JTI {}".format( token) return self._request(method, url, data, expect, True) else: logger.error( "Authorization header '%s' still invalid after reauthentication. Bailing out!", self._sess.headers["Authorization"]) args = (method.upper(), url, r.status_code, r.text) raise RuntimeError("Can't %s to %s, err=%s, text=%s" % args) return r
def sync_index(): path = config.BUILD_PATH bucket_name = config.BUILD_BUCKET file_path = "{path}/index.json".format(path=path) folder = "config/{path}/".format(path=path) logger.info("Downloading index.json for %s in %s to %s...", file_path, bucket_name, folder) try: conn = connect_to_region(config.S3_REGION_NAME, calling_format=OrdinaryCallingFormat()) except Exception as e: logger.exception( "Fatal error! Could not connect to S3 region '%s': %s", config.S3_REGION_NAME, e) sys.exit(2) bucket = conn.get_bucket(bucket_name) key = bucket.get_key(file_path) if key is None: logger.error("Index file '%s' not found on S3" % file_path) sys.exit(1) contents = key.get_contents_as_string() try: os.makedirs(folder) except: pass local_filename = os.path.join(folder, "index.json") with open(local_filename, "wb") as f: f.write(contents) d = json.loads(contents) for entry in d["refs"]: path = entry["build_manifest"] key = bucket.get_key(path) if key is None: logger.error("File '%s' not found on S3" % path) sys.exit(1) contents = key.get_contents_as_string() local_filename = os.path.join(folder, path.split("/")[-1]) with open(local_filename, "wb") as f: f.write(contents)
def get_machine_details(): ret = {} try: import psutil except ImportError: logger.error("psutil not available. Cannot get machine into") return ret import platform ret["cpu_count"] = psutil.cpu_count(logical=False) ret["cpu_count_logical"] = psutil.cpu_count(logical=True) ret["total_memory_mb"] = psutil.virtual_memory().total // 1024 // 1024 ret["machine_name"] = platform.node() ret["processor"] = platform.processor() ret["platform"] = platform.platform() ret["python_version"] = platform.python_version() ret["system"] = platform.system() name = subprocess.check_output(["wmic", "cpu", "get", "name"]).strip().split("\n")[1] ret["cpu_name"] = name return ret
def get_refined_results(input_string): non_alpha_regex = r'\W' non_alpha_pat = re.compile(non_alpha_regex) keywords = ['Deleterious', 'positive'] result = None try: for word in keywords: if str(input_string).upper().__contains__(word.upper()): result = word else: result = re.sub(r'\W', '', input_string) return result except: status_message = 'ERROR : Error occurred while preparing data for HBase.\nERROR in ', locals( ), ' while running. ERROR MESSAGE: ' + str(traceback.format_exc()) logger.error(status_message) return
def download_latest_builds(force=False): ts = get_ts() product_name = config.product_name group_name = config.group_name # get the S3 location where the builds for this product are located rows = ts.get_table('ue4-build-artifacts').find({'product_name': product_name}) if not rows: logger.error("No UE4 build artifacts configured for product '%s'" % product_name) sys.exit(1) bucket_name = rows[0]['bucket_name'] path = rows[0]['path'] s3_region = rows[0]['s3_region'] refs = get_local_refs() if refs: logger.info('Syncing builds for the following refs: %s' % repr(refs)) for ref, tenant in refs: build_info = get_manifest(ref) if build_info is None: logger.info("Build %s not found. Ignoring ref.", ref) continue build_name = build_info["build"] print "Checking out build '%s'" % build_name if not force and is_build_installed(build_name, build_info["executable_path"]): logger.info("Build '%s' already installed" % build_name) continue log_details = {"archive": build_info["archive"]} log_event("download_build", "Downloading build for ref '%s'" % ref, details=log_details, tenant_name=tenant) local_filename = download_build(build_info["archive"], ignore_if_exists=(not force)) log_details["local_filename"] = local_filename log_event("download_build_complete", "Finished downloading build for ref '%s'" % ref, details=log_details, tenant_name=tenant) logger.info("Done downloading '%s' to %s" % (build_info["archive"], local_filename)) install_build(local_filename) log_event("install_build_complete", "Finished installing build for ref '%s'" % ref, details=log_details, tenant_name=tenant)
def run(self): try: build_info = get_manifest(self.ref) build_path = build_info["build"] index_file = get_index() command_line = config_file["command-line"] status = "starting" build_path = build_info["build"] executable = os.path.join(config.BSD_BATTLESERVER_FOLDER, build_info["build"], build_info["executable_path"]) if not os.path.exists(executable): log_event( "build_not_installed", "Build '%s' not installed. Cannot start daemon." % build_info["build"]) return start_time = time.time() loop_cnt = 0 # read line without blocking while 1: loop_cnt += 1 diff = (time.time() - start_time) p = None config_num_processes = get_num_processes(self.ref, self.tenant) if config_num_processes != self.num_processes: txt = "Number of processes in config for ref '%s' has changed from %s to %s" % ( self.ref, self.num_processes, config_num_processes) logger.warning(txt) log_event("num_processes_changed", txt) # if we should run more processes: no problem, we'll add them in automatically # but if we should run fewer processes we need to kill some self.num_processes = config_num_processes if len(self.battleserver_instances) > self.num_processes: servers_killed = [] while len(self.battleserver_instances ) > self.num_processes: logger.info( "I am running %s battleservers but should be running %s. Killing servers..." % (len(self.battleserver_instances), self.num_processes)) # try to find a server that is not 'running'. If no such servers are found then kill a running one for pid, (q, battleserver_resource, status ) in self.battleserver_instances.items(): resource_status = battleserver_resource.get_status( ) if resource_status != "running": logger.info( "Found battleserver in state '%s' to kill: %s" % (resource_status, battleserver_resource)) pid_to_kill = pid break else: logger.warning( "Found no battleserver to kill that was not 'running'. I will kill a running one" ) pid_to_kill = self.battleserver_instances.keys( )[0] try: p = psutil.Process(pid_to_kill) q, battleserver_resource, status = self.battleserver_instances[ pid_to_kill] logger.info("Killing server with pid %s" % pid_to_kill) p.terminate() servers_killed.append(str(pid_to_kill)) battleserver_resource.set_status( "killed", {"status-reason": "Scaling down"}) except psutil.NoSuchProcess: logger.info( "Cannot kill %s because it's already dead") del self.battleserver_instances[pid_to_kill] time.sleep(5.0) txt = "Done killing servers for ref '%s'. Killed servers %s and am now running %s servers" % ( self.ref, ", ".join(servers_killed), len(self.battleserver_instances)) log_event("servers_killed", txt) if self.num_processes == 0: logger.info("Running zero processes") time.sleep(10) continue if len(self.battleserver_instances) < self.num_processes: num_added = 0 while len( self.battleserver_instances) < self.num_processes: logger.info( "I am running %s battleservers but should be running %s. Adding servers..." % (len(self.battleserver_instances), self.num_processes)) pid, q, battleserver_resource = self.start_battleserver( ) self.battleserver_instances[pid] = ( q, battleserver_resource, "starting") num_added += 1 time.sleep(5.0) logger.info( "Done adding servers. Running instances: %s" % ",".join([ str(p) for p in self.battleserver_instances.keys() ])) txt = "Done adding servers for ref '%s'. Added %s servers and am now running %s servers" % ( self.ref, num_added, len(self.battleserver_instances)) log_event("servers_added", txt) for pid, (q, battleserver_resource, status) in self.battleserver_instances.iteritems(): try: p = psutil.Process(pid) except psutil.NoSuchProcess: logger.info("Process %s running server '%s' has died", pid, battleserver_resource) resource_status = battleserver_resource.get_status() if resource_status == "starting": battleserver_resource.set_status( "abnormalexit", {"status-reason": "Failed to start"}) if resource_status == "running": battleserver_resource.set_status( "abnormalexit", {"status-reason": "Died prematurely"}) # else the instance has updated the status time.sleep(5.0) logger.info("Restarting UE4 Server (1)...") del self.battleserver_instances[pid] break new_index_file = get_index() old_manifest = find_build_manifest(index_file, self.ref) new_manifest = find_build_manifest(new_index_file, self.ref) if old_manifest != new_manifest: build_info = get_manifest(self.ref) build_path = build_info["build"] logger.info("Index file has changed. Reloading") self.shutdown_servers_and_exit("New build is available") while 1: if not self.battleserver_instances: break empty = True for pid, ( q, battleserver_resource, status) in self.battleserver_instances.iteritems(): try: line = q.get(timeout=.1) except Empty: #sys.stdout.write(".") print "%s..." % pid time.sleep(1.0) else: # got line empty = False logger.debug("stdout: %s", line) if "Game Engine Initialized." in line: logger.info("Game server has started up!") status = "started" self.battleserver_instances[pid] = ( q, battleserver_resource, status) if line == "ProcessExit": logger.info("UE4 Process has exited") resource_status = battleserver_resource.get_status( ) if resource_status == "starting": battleserver_resource.set_status( "abnormalexit", {"status-reason": "Failed to start"}) # else the instance has updated the status time.sleep(5.0) logger.info("Restarting UE4 Server (2)...") try: p = psutil.Process(pid) if p: p.terminate() except: pass del self.battleserver_instances[pid] empty = True break if empty: time.sleep(1.0) break for pid, (q, battleserver_resource, status) in self.battleserver_instances.items(): if status == "starting" and diff > 60.0: logger.error( "Server still hasn't started after %.0f seconds!" % diff) sys.exit(-1) elif status == "started" and loop_cnt % 10 == 0: resp = battleserver_resource.get().json() if len(resp["pending_commands"]) > 0: for cmd in resp["pending_commands"]: logger.warning( "I should execute the following command: '%s'", cmd["command"]) command_resource = copy.copy( battleserver_resource) command_resource.location = cmd["url"] command_resource.patch( data={"status": "running"}) if cmd["command"] == "kill": logger.error( "External command to kill servers!") self.shutdown_servers_and_exit( "Received command to kill all") resource_status = resp["status"] if diff > 60.0 and resource_status == "starting": logger.error( "Server is still in status '%s' after %.0f seconds!" % (resource_status, diff)) battleserver_resource.set_status( "killed", { "status-reason": "Failed to reach 'started' status" }) time.sleep(5.0) logger.info("Restarting UE4 Server (4)...") try: p = psutil.Process(pid) if p: p.terminate() except: pass del self.battleserver_instances[pid] else: heartbeat_date = dateutil.parser.parse( resp["heartbeat_date"]).replace(tzinfo=None) heartbeat_diff = (datetime.datetime.utcnow() - heartbeat_date).total_seconds() if heartbeat_diff > 60: logger.error( "Server heartbeat is %s seconds old. The process must be frozen", heartbeat_diff) battleserver_resource.set_status( "killed", {"status-reason": "Heartbeat timeout"}) time.sleep(5.0) logger.info("Restarting UE4 Server (5)...") try: p = psutil.Process(pid) if p: p.terminate() except: pass del self.battleserver_instances[pid] except KeyboardInterrupt: logger.info("User exiting...") self.shutdown_servers_and_exit("User exit") except Exception as e: # unhandled exception logger.exception( "Fatal error occurred in run_battleserver_loop. Exiting") self.shutdown_servers_and_exit( "Fatal error, '%s' occurred in run_battleserver_loop" % e)