def flaskify(self, function, cache=True): """ Simplifies API endpoints that just accept owner and repo, transforms them and spits them out """ if cache: def generated_function(*args, **kwargs): def heavy_lifting(): return self.transform(function, args, kwargs, **request.args.to_dict()) body = self.cache.get(key=str(request.url), createfunc=heavy_lifting) return Response(response=body, status=200, mimetype="application/json") generated_function.__name__ = function.__name__ logger.info(generated_function.__name__) return generated_function else: def generated_function(*args, **kwargs): kwargs.update(request.args.to_dict()) return Response(response=self.transform( function, args, kwargs, **request.args.to_dict()), status=200, mimetype="application/json") generated_function.__name__ = function.__name__ return generated_function
def exit(): try: for pid in worker_pids: os.kill(pid, 9) except: logger.info("Worker process {} already killed".format(pid)) for process in worker_processes: logger.info("Shutting down worker process with pid: {} ...".format( process)) process.terminate() if master is not None: master.halt() logger.info("Shutting down app updates...") app.shutdown_updates() logger.info("Finalizing config...") app.finalize_config() logger.info("Shutting down housekeeper updates...") if housekeeper is not None: housekeeper.shutdown_updates() # if hasattr(manager, "shutdown"): # wait for the spawner and the worker threads to go down # if manager is not None: manager.shutdown() # check if it is still alive and kill it if necessary # if manager._process.is_alive(): manager._process.terminate()
def update(self): """ Makes sure the storage_folder contains updated versions of all the repos """ lock = LockFile(os.path.join(self.repo_folder, 'update_lock')) with lock: self.is_updater = True for repo_url in self._repo_urls: with self.get_repo(repo_url) as repo: logger.info('Git: Calculating metrics for %s', repo.url) # Do slow functions and rebuild their caches self.lines_changed_minus_whitespace(repo.url, rebuild_cache=True) self.is_updater = False
def exit(): try: for pid in worker_pids: os.kill(pid, 9) except: logger.info("Worker process {} already killed".format(pid)) for process in worker_processes: logger.info("Shutting down worker process with pid: {} ...".format( process.pid)) process.terminate() if master is not None: master.halt() logger.info("Shutting down housekeeper updates...") if housekeeper is not None: housekeeper.shutdown_updates() # if hasattr(manager, "shutdown"): # wait for the spawner and the worker threads to go down # if manager is not None: manager.shutdown() # check if it is still alive and kill it if necessary # if manager._process.is_alive(): manager._process.terminate() # Prevent multiprocessing's atexit from conflicting with gunicorn logger.info("Killing main augur process with PID: {}".format( os.getpid())) os.kill(os.getpid(), 9) os._exit(0)
def worker_start(worker_name=None, instance_number=0, worker_port=None): time.sleep(120 * instance_number) destination = subprocess.DEVNULL try: destination = open( "workers/{}/worker_{}.log".format(worker_name, worker_port), "a+") except IOError as e: logger.error( "Error opening log file for auto-started worker {}: {}".format( worker_name, e)) process = subprocess.Popen("cd workers/{} && {}_start".format( worker_name, worker_name), shell=True, stdout=destination, stderr=subprocess.STDOUT) logger.info("{} booted.".format(worker_name))
def __enter__(self): """ Update context """ self.lock.acquire(timeout=0) logger.info('Git: Updating %s', self.url) if not os.path.exists(self.path): logger.debug('Cloning %s', self.url) git.Git(self.containing_folder).clone(self.url, self.path) else: try: repo = self.git(is_updater=True) logger.debug('Pulling %s', self.url) repo.git.pull() except Exception as e: logger.debug('Re-Cloning %s because %s', self.url, str(e)) shutil.rmtree(self.path) git.Git(self.containing_folder).clone(self.url, self.path) return self
def run(): """ Runs app, halts app if exceptions/conflicts are found """ mp.set_start_method('forkserver') app = augur.Application() app.arg_parser.add_argument( "-u", "--updater", action="store_true", help="Do not start the Gunicorn server, only run update threads.") args, unknown_args = app.arg_parser.parse_known_args() logger.info('Loading...') # app.init_all() app.finalize_config() app.schedule_updates() master = None @atexit.register def exit(): if master is not None: master.halt() app.shutdown_updates() # Prevent multiprocessing's atexit from conflicting with gunicorn os._exit(0) if not args.updater: host = app.read_config('Server', 'host', 'AUGUR_HOST', '0.0.0.0') port = app.read_config('Server', 'port', 'AUGUR_PORT', '5000') workers = int( app.read_config('Server', 'workers', 'AUGUR_WORKERS', mp.cpu_count())) options = { 'bind': '%s:%s' % (host, port), 'workers': workers, 'accesslog': '-', 'access_log_format': '%(h)s - %(t)s - %(r)s', } logger.info('Starting server...') master = Arbiter(AugurGunicornApp(options)).run() else: logger.info('Running in update mode...') try: app.join_updates() except KeyboardInterrupt: exit()
def __exit__(self, type, value, traceback): # Save the updated time self.data['last_updated'] = str(datetime.datetime.utcnow()) self.save() logger.info('Git: Update completed for %s', self.url) self.lock.break_lock()
def lines_changed_minus_whitespace(self, repo_url, from_commit=None, df=None, rebuild_cache=True): """ Makes sure the storageFolder contains updated versions of all the repos """ def heavy_lifting(): nonlocal df from_commit = None repo = self.get_repo(repo_url) git_repo = repo.git() frames = [] if df is not None: frames.append(df) from_commit = df['hash'].iloc[-1] """ Run a Git log command that returns each entry into 3 parts: 1. JSON of the metadata 2. Commit message 3. Diffs """ arg_array = ['-p', '-w', '-m', '--full-history', '--reverse', """--pretty=format:'[START ENTRY]%n{%n"hash":"%h",%n"author_name":"%an",%n"author_email":"%ae",%n"author_date":"%ai",%n"committer_name": "%cn",%n"committer_email":"%ce",%n"commit_date":"%ci",%n"parents":"%p"%n}%n#####SPLIT#####%s#####SPLIT#####'"""] if from_commit is not None: arg_array.append('{}..'.format(from_commit)) history = git_repo.git.log(*arg_array) # Split the message into individual entries entries = history.split('[START ENTRY]')[1:] for entry in entries: splits = entry.split('#####SPLIT#####') try: data = json.loads(splits[0]) except json.JSONDecodeError as err: continue data['message'] = splits[1] if (len(splits[2]) > 2): diffs = splits[2].split('diff --git') for diff in diffs[1:]: if '+' in diff: file_search = re.search('b(\/.+)', diff) if file_search is not None: filename = file_search.group(1) # Find all the lines that begin with a plus or minus to count added # Minus one to account the file matches additions = len(re.findall('\n\+[ \t]*[^\s]', diff)) - 1 deletions = len(re.findall('\n-[ \t]*[^\s]', diff)) - 1 data['additions'] = additions data['deletions'] = deletions frames.append(pd.DataFrame(data, index=['hash'])) if len(frames): df = pd.concat(frames) df['author_affiliation'] = self._csv.classify_emails(df['author_email']) df['committer_affiliation'] = self._csv.classify_emails(df['committer_email']) return df results = self.__cache.get(key='lc-{}'.format(repo_url), createfunc=heavy_lifting) if rebuild_cache: self.__cache.remove_value(key='lc-{}'.format(repo_url)) new_results = self.lines_changed_minus_whitespace(repo_url, df=results, rebuild_cache=False) if len(new_results) > len(results): logger.info('Git: Added commits from %s to %s', results['hash'].iloc[-1], new_results['hash'].iloc[-1]) results = new_results return results
def cli(app, enable_housekeeper): def get_process_id(name): """Return process ids found by name or command """ child = subprocess.Popen(['pgrep', '-f', name], stdout=subprocess.PIPE, shell=False) response = child.communicate()[0] return [int(pid) for pid in response.split()] mp.set_start_method('forkserver', force=True) app.schedule_updates() master = None manager = None broker = None housekeeper = None logger.info("Booting broker and its manager...") manager = mp.Manager() broker = manager.dict() controller = app.read_config('Workers') worker_pids = [] worker_processes = [] if enable_housekeeper: if not controller: return for worker in controller.keys(): if not controller[worker]['switch']: continue logger.info( "Your config has the option set to automatically boot {} instances of the {}" .format(controller[worker]['workers'], worker)) pids = get_process_id( "/bin/sh -c cd workers/{} && {}_start".format(worker, worker)) worker_pids += pids if len(pids) > 0: worker_pids.append(pids[0] + 1) pids.append(pids[0] + 1) logger.info( "Found and preparing to kill previous {} worker pids: {}". format(worker, pids)) for pid in pids: try: os.kill(pid, 9) except: logger.info( "Worker process {} already killed".format(pid)) for i in range(controller[worker]['workers']): print(i) logger.info("Booting {} #{}".format(worker, i + 1)) worker_process = mp.Process(target=worker_start, kwargs={ 'worker_name': worker, 'instance_number': i }, daemon=True) worker_process.start() worker_processes.append(worker_process) @atexit.register def exit(): try: for pid in worker_pids: os.kill(pid, 9) except: logger.info("Worker process {} already killed".format(pid)) for process in worker_processes: logger.info("Shutting down worker process with pid: {} ...".format( process.pid)) process.terminate() if master is not None: master.halt() logger.info("Shutting down app updates...") app.shutdown_updates() logger.info("Finalizing config...") app.finalize_config() logger.info("Shutting down housekeeper updates...") if housekeeper is not None: housekeeper.shutdown_updates() # if hasattr(manager, "shutdown"): # wait for the spawner and the worker threads to go down # if manager is not None: manager.shutdown() # check if it is still alive and kill it if necessary # if manager._process.is_alive(): manager._process.terminate() # Prevent multiprocessing's atexit from conflicting with gunicorn logger.info("Killing main augur process with PID: {}".format( os.getpid())) os.kill(os.getpid(), 9) os._exit(0) if enable_housekeeper: logger.info("Booting housekeeper...") jobs = app.read_config('Housekeeper', 'jobs', 'AUGUR_JOBS', []) try: housekeeper = Housekeeper( jobs, broker, broker_host=app.read_config('Server', 'host', 'AUGUR_HOST', 'localhost'), broker_port=app.read_config('Server', 'port', 'AUGUR_PORT', '5000'), user=app.read_config('Database', 'user', 'AUGUR_DB_USER', 'root'), password=app.read_config('Database', 'password', 'AUGUR_DB_PASS', 'password'), host=app.read_config('Database', 'host', 'AUGUR_DB_HOST', '127.0.0.1'), port=app.read_config('Database', 'port', 'AUGUR_DB_PORT', '3306'), dbname=app.read_config('Database', 'database', 'AUGUR_DB_NAME', 'msr14')) except KeyboardInterrupt as e: exit() host = app.read_config('Server', 'host', 'AUGUR_HOST', '0.0.0.0') port = app.read_config('Server', 'port', 'AUGUR_PORT', '5000') workers = int( app.read_config('Server', 'workers', 'AUGUR_WORKERS', mp.cpu_count())) options = { 'bind': '%s:%s' % (host, port), 'workers': workers, 'accesslog': '-', 'access_log_format': '%(h)s - %(t)s - %(r)s', } logger.info('Starting server...') master = Arbiter( AugurGunicornApp(options, manager=manager, broker=broker, housekeeper=housekeeper)).run()
def worker_start(worker_name=None): logger.info("Booting {}".format(worker_name)) process = subprocess.Popen("cd workers/{} && {}_start".format( worker_name, worker_name), shell=True)
def cli(ctx, disable_housekeeper, skip_cleanup): """ Start Augur's backend server """ if not skip_cleanup: logger.info("Cleaning up old Augur processes. Just a moment please...") ctx.invoke(kill_processes) time.sleep(2) else: logger.info("Skipping cleanup processes.") def get_process_id(name): """Return process ids found by name or command """ child = subprocess.Popen(['pgrep', '-f', name], stdout=subprocess.PIPE, shell=False) response = child.communicate()[0] return [int(pid) for pid in response.split()] app = ctx.obj mp.set_start_method('forkserver', force=True) master = None manager = None broker = None housekeeper = None logger.info("Booting broker and its manager...") manager = mp.Manager() broker = manager.dict() controller = app.read_config('Workers') worker_pids = [] worker_processes = [] if not disable_housekeeper: if not controller: return for worker in controller.keys(): if not controller[worker]['switch']: continue logger.info( "Your config has the option set to automatically boot {} instances of the {}" .format(controller[worker]['workers'], worker)) pids = get_process_id( "/bin/sh -c cd workers/{} && {}_start".format(worker, worker)) worker_pids += pids if len(pids) > 0: worker_pids.append(pids[0] + 1) pids.append(pids[0] + 1) logger.info( "Found and preparing to kill previous {} worker pids: {}". format(worker, pids)) for pid in pids: try: os.kill(pid, 9) except: logger.info( "Worker process {} already killed".format(pid)) @atexit.register def exit(): try: for pid in worker_pids: os.kill(pid, 9) except: logger.info("Worker process {} already killed".format(pid)) for process in worker_processes: logger.info("Shutting down worker process with pid: {} ...".format( process.pid)) process.terminate() if master is not None: master.halt() logger.info("Shutting down housekeeper updates...") if housekeeper is not None: housekeeper.shutdown_updates() # if hasattr(manager, "shutdown"): # wait for the spawner and the worker threads to go down # if manager is not None: manager.shutdown() # check if it is still alive and kill it if necessary # if manager._process.is_alive(): manager._process.terminate() # Prevent multiprocessing's atexit from conflicting with gunicorn logger.info("Killing main augur process with PID: {}".format( os.getpid())) os.kill(os.getpid(), 9) os._exit(0) if not disable_housekeeper: logger.info("Booting housekeeper...") jobs = deepcopy(app.read_config('Housekeeper', 'jobs')) try: housekeeper = Housekeeper( jobs, broker, broker_host=app.read_config('Server', 'host'), broker_port=app.read_config('Server', 'port'), user=app.read_config('Database', 'user'), password=app.read_config('Database', 'password'), host=app.read_config('Database', 'host'), port=app.read_config('Database', 'port'), dbname=app.read_config('Database', 'name')) except KeyboardInterrupt as e: exit() logger.info("Housekeeper has finished booting.") if controller: for worker in controller.keys(): if controller[worker]['switch']: for i in range(controller[worker]['workers']): logger.info("Booting {} #{}".format(worker, i + 1)) worker_process = mp.Process( target=worker_start, kwargs={ 'worker_name': worker, 'instance_number': i, 'worker_port': controller[worker]['port'] }, daemon=True) worker_process.start() worker_processes.append(worker_process) host = app.read_config('Server', 'host') port = app.read_config('Server', 'port') workers = int(app.read_config('Server', 'workers')) timeout = int(app.read_config('Server', 'timeout')) options = { 'bind': '%s:%s' % (host, port), 'workers': workers, 'accesslog': '-', 'access_log_format': '%(h)s - %(t)s - %(r)s', 'timeout': timeout } logger.info('Starting server...') master = Arbiter( AugurGunicornApp(options, manager=manager, broker=broker, housekeeper=housekeeper)).run()