def admin_login(): """ Login for an admin account """ if g.admin is not None: return redirect(url_for('admin_home', admin_id=g.admin['project_id'])) form = LoginForm(request.form) if form.validate_on_submit(): # On submit, grab name & password project_name = form.project_name.data password = form.password.data # Try login db = DB() resp = db.auth(project_name, password) if resp['status'] and resp['admin']: session['admin_project_id'] = resp['project_id'] admin_detail = db.get_project_detail(session['admin_project_id']) admin_id = admin_detail['project_id'] return redirect(url_for('admin_home', admin_id=admin_id)) elif not resp['admin']: flash('Invalid admin account!') else: flash(resp['message']) return render_template('admin_login.html', form=form)
def login(): """ Handles project account authentication """ if g.project is not None: return redirect(url_for('home', project_name=g.project['project_name'])) form = LoginForm(request.form) if form.validate_on_submit(): # On submit, grab name & password project_name = form.project_name.data password = form.password.data # Try login db = DB() resp = db.auth(project_name, password) if resp['status']: session['project_id'] = resp['project_id'] project_detail = db.get_project_detail(session['project_id']) project_name = project_detail['project_name'] return redirect(url_for('home', project_name=project_name)) else: flash(resp['message']) return render_template('login.html', form=form)
def decorated_function(*args, **kwargs): g.admin = None if 'admin_project_id' in session: db = DB() resp = db.get_project_detail(session['admin_project_id']) if resp['status']: g.admin = resp return f(*args, **kwargs)
def _aload_project(project_name): """ Utility method to load an admin project detail if an admin is viewing their control page """ db = DB() resp = db.stack_config.find_one({'project_name': project_name}) g.project = db.get_project_detail(str(resp['_id'])) session['project_id'] = str(resp['_id'])
class Controller(object): """ Controller - A class for controlling STACK processes. Calls the Process() class to start and stop STACK processes. """ def __init__(self, process, cmdline=False, home_dir='.', umask=0o22, verbose=1, **kwargs): self.db = DB() self.process = process self.cmdline = cmdline self.usage_message = 'controller collect|process|insert start|stop|restart project_id collector_id' self.home_dir = home_dir self.umask = umask self.verbose = verbose if self.cmdline is False: # Grab information from Flask user object self.project = kwargs['project'] self.project_id = self.project['project_id'] self.project_name = self.project['project_name'] else: # Command is coming from the command line, look up info self.project_id = kwargs['project_id'] resp = self.db.get_project_detail(self.project_id) if resp['status']: self.project_name = resp['project_name'] else: print('Project w/ ID %s not found!' % self.project_id) print('') print('USAGE: python %s %s' % (sys.argv[0], self.usage_message)) sys.exit(1) # Project account DB connection project_info = self.db.get_project_detail(self.project_id) configdb = project_info['project_config_db'] project_config_db = self.db.connection[configdb] self.projectdb = project_config_db.config # Loads info for process based on type: collector, processor, inserter if self.process in ['process', 'insert']: # Only module type needed for processor / inserter self.module = kwargs['network'] self.collector_id = None # Set name for worker based on gathered info self.process_name = self.project_name + '-' + self.process + '-' + self.module + '-' + self.project_id elif process == 'collect': # For collectors, also grabs: collector_id, api, collector_name self.collector_id = kwargs['collector_id'] resp = self.db.get_collector_detail(self.project_id, self.collector_id) if resp['status']: collector = resp['collector'] self.module = collector['network'] self.api = collector['api'] self.collector_name = collector['collector_name'] else: print('Collector (ID: %s) not found!' % self.collector_id) print('') print('USAGE: python %s %s' % (sys.argv[0], self.usage_message)) sys.exit(1) # Set name for worker based on gathered info self.process_name = self.project_name + '-' + self.collector_name + '-' + self.process + '-' + self.module + \ '-' + self.collector_id # Sets out directories self.piddir = app.config['LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/pid' self.logdir = app.config['LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/logs' self.stddir = app.config['LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/std' # Sets data dirs # TODO - deprecate w/ Facebook self.rawdir = app.config[ 'DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + '/raw' self.archdir = app.config['DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + \ '/archive' self.insertdir = app.config['DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + \ '/insert_queue' # self.rawdir = "Users/harshita/Downloads/20200608-09-ClimateChange1-5ec8d268f57962092d668731\-5ec8d3cef57962093753ca79-tweets_out.json" # self.archdir = app.config['DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + \ # '/archive' # self.insertdir = app.config['DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + \ # '/insert_queue' # Creates dirs if they don't already exist if not os.path.exists(self.piddir): os.makedirs(self.piddir) if not os.path.exists(self.stddir): os.makedirs(self.stddir) # These directories only need be created for Twitter # TODO - deprecate w/ Facebook if self.module == 'twitter': if not os.path.exists(self.logdir): os.makedirs(self.logdir) if not os.path.exists(self.rawdir): os.makedirs(self.rawdir) if not os.path.exists(self.archdir): os.makedirs(self.archdir) if not os.path.exists(self.insertdir): os.makedirs(self.insertdir) # Sets outfiles self.pidfile = self.piddir + '/%s.pid' % self.process_name self.stdout = self.stddir + '/%s-stdout.txt' % self.process_name self.stderr = self.stddir + '/%s-stderr.txt' % self.process_name self.stdin = self.stddir + '/%s-stdin.txt' % self.process_name # Creates the std files for the daemon if not os.path.isfile(self.stdout): create_file = open(self.stdout, 'w') create_file.close() if not os.path.isfile(self.stdin): create_file = open(self.stdin, 'w') create_file.close() if not os.path.isfile(self.stderr): create_file = open(self.stderr, 'w') create_file.close() def process_command(self, cmd): """ Prases the passed command (start / stop / restart) and initiates daemonization """ # Makes sure the command is relevant if self.cmdline and cmd not in ['start', 'stop', 'restart']: print('Invalid command: %s' % cmd) print('') print('USAGE: python %s %s' % (sys.argv[0], self.usage_message)) sys.exit(1) elif cmd == 'start': self.start() elif cmd == 'stop': self.stop() elif cmd == 'restart': self.restart() else: print('USAGE: python %s %s' % (sys.argv[0], self.usage_message)) if self.cmdline: sys.exit(1) def start(self): """ Method that starts the daemon process """ print('Initializing the STACK daemon: %s' % self.process_name) # Sets flags for given process resp = '' if self.process == 'collect': resp = self.db.set_collector_status(self.project_id, self.collector_id, collector_status=1) elif self.process == 'process': resp = self.db.set_network_status(self.project_id, self.module, run=1, process=True) elif self.process == 'insert': resp = self.db.set_network_status(self.project_id, self.module, run=1, insert=True) if 'status' in resp and resp['status']: print('Flags set.') # Check to see if running based on pidfile pid = self.get_pid() if pid: message = "pidfile %s already exists. Is it already running?\n" sys.stderr.write(message % self.pidfile) sys.exit(1) # Start the daemon self.daemonize() self.run() else: print('Failed to successfully set flags, try again.') def stop(self): """ Method that sets flags and stops the daemon process """ print('Stop command received.') print('Step 1) Setting flags on the STACK process to stop.') if self.process == 'collect': # Set flags for the STACK process to stop resp = self.db.set_collector_status(self.project_id, self.collector_id, collector_status=0) # Grab active flag from collector's Mongo document collector_conf = self.projectdb.find_one({'_id': ObjectId(self.collector_id)}) active = collector_conf['active'] else: module_conf = self.projectdb.find_one({'module': self.module}) if self.process == 'process': resp = self.db.set_network_status(self.project_id, self.module, run=0, process=True) active = module_conf['processor_active'] else: resp = self.db.set_network_status(self.project_id, self.module, run=0, insert=True) active = module_conf['inserter_active'] # TODO - mongo error handling if resp['status']: print('Step 1 complete.') # If the daemon has already stopped, then set flags and break pid = self.get_pid() if not pid: print("STACK daemon already terminated.") # Extra clean up, just in case if os.path.exists(self.pidfile): os.remove(self.pidfile) if self.process in ['process', 'insert']: if self.process == 'process': self.projectdb.update({'module': self.module}, {'$set': {'processor_active': 0}}) else: self.projectdb.update({'module': self.module}, {'$set': {'inserter_active': 0}}) else: self.projectdb.update({'_id': ObjectId(self.collector_id)}, {'$set': {'active': 0}}) return # Step 2) Check for task / STACK process completion; loops through 15 times to check print('Step 2) Check for STACK process completion and shutdown the daemon.') wait_count = 0 while active == 1: wait_count += 1 if self.process in ['process', 'insert']: module_conf = self.projectdb.find_one({'module': self.module}) if self.process == 'process': active = module_conf['processor_active'] else: active = module_conf['inserter_active'] else: collector_conf = self.projectdb.find_one({'_id': ObjectId(self.collector_id)}) active = collector_conf['active'] print('Try %d / 15' % wait_count) print('Active Status: %d' % active) print('Trying again in 5 seconds.') print('') if wait_count > 15: break time.sleep(5) # Get the pid from the pidfile pid = self.get_pid() if not pid: print("Daemon successfully stopped via thread termination.") # Just to be sure. A ValueError might occur if the PID file is # empty but does actually exist if os.path.exists(self.pidfile): os.remove(self.pidfile) return # Not an error in a restart # Try killing the daemon process print('Daemon still running w/ loose thread. Stopping now...') try: i = 0 while 1: os.kill(pid, signal.SIGTERM) time.sleep(0.1) i = i + 1 if i % 10 == 0: os.kill(pid, signal.SIGHUP) except OSError as err: err = str(err) if err.find("No such process") > 0: if os.path.exists(self.pidfile): os.remove(self.pidfile) else: print(str(err)) sys.exit(1) # Had to kill the daemon, so set the active status flag accordingly. if self.process in ['process', 'insert']: if self.process == 'process': self.projectdb.update({'module': self.module}, {'$set': {'processor_active': 0}}) else: self.projectdb.update({'module': self.module}, {'$set': {'inserter_active': 0}}) else: self.projectdb.update({'_id': ObjectId(self.collector_id)}, {'$set': {'active': 0}}) print('Stopped.') def restart(self): """ Simple restart of the daemon """ # TODO - restart w/out shutting down daemon as part of extensible processor modules self.stop() self.start() def run(self): """ Calls the process logic scripts and runs """ # Backwards compatibility for older Twitter scripts if self.module == 'twitter': if self.process == 'collect': ThreadedCollector.go(self.api, self.project_id, self.collector_id, self.rawdir, self.logdir) elif self.process == 'process': preprocess.go(self.project_id, self.rawdir, self.archdir, self.insertdir, self.logdir) elif self.process == 'insert': mongoBatchInsert.go(self.project_id, self.rawdir, self.insertdir, self.logdir) # New approach via extensible collectors else: # Dynamically import collect from os.chdir(app.config['BASEDIR']) if self.process == 'collect': _temp = __import__('app.%s.collect' % self.module, globals(), locals(), ['Collector'], -1) Collector = _temp.Collector c = Collector(self.project_id, self.collector_id, self.process_name) c.go() elif self.process == 'process': _temp = __import__('app.%s.process' % self.module, globals(), locals(), ['Processor'], -1) Processor = _temp.Processor c = Processor(self.project_id, self.process_name, self.module) c.go() elif self.process == 'insert': _temp = __import__('app.%s.insert' % self.module, globals(), locals(), ['Inserter'], -1) Inserter = _temp.Inserter c = Inserter(self.project_id, self.process_name, self.module) c.go() def daemonize(self): """ Do the UNIX double-fork magic, see Stevens' "Advanced Programming in the UNIX Environment" for details (ISBN 0201563177) http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16 """ try: pid = os.fork() if pid > 0: # Exit first parent sys.exit(0) except OSError as e: sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror)) sys.exit(1) # Decouple from parent environment os.chdir(self.home_dir) os.setsid() os.umask(self.umask) # Do second fork try: pid = os.fork() if pid > 0: # Exit from second parent sys.exit(0) except OSError as e: sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror)) sys.exit(1) sys.stdout.flush() sys.stderr.flush() si = open(self.stdin, 'r+') so = open(self.stdout, 'a+') if self.stderr: se = open(self.stderr, 'a+') else: se = so if self.cmdline: os.dup2(si.fileno(), sys.stdin.fileno()) os.dup2(so.fileno(), sys.stdout.fileno()) os.dup2(se.fileno(), sys.stderr.fileno()) sys.stderr.flush() sys.stdout.flush() def sigtermhandler(signum, frame): self.daemon_alive = False signal.signal(signal.SIGTERM, sigtermhandler) signal.signal(signal.SIGINT, sigtermhandler) if self.verbose >= 1: print("Started") # Write pidfile atexit.register( self.delpid) # Make sure pid file is removed if we quit pid = str(os.getpid()) open(self.pidfile, 'w+').write("%s\n" % pid) def delpid(self): os.remove(self.pidfile) def get_pid(self): try: pf = open(self.pidfile, 'r') pid = int(pf.read().strip()) pf.close() except IOError: pid = None except SystemExit: pid = None return pid
class BaseInserter(object): """ Extensible base class for all STACK processors NOTE - when extending, must initiate connections to network specific data directories! """ def __init__(self, project_id, process_name, network): self.project_id = project_id self.process_name = process_name self.network = network # Sets up connection w/ project config DB & loads in collector info self.db = DB() project = self.db.get_project_detail(self.project_id) self.project_name = project['project_name'] # Grabs connection to project config DB configdb = project['project_config_db'] project_db = self.db.connection[configdb] self.project_db = project_db.config # Grabs connection to insertion DB # NOTE - on init, need to connect to appropriate network collection db_name = self.project_name + '_' + self.project_id self.insert_db = self.db.connection[db_name] # Sets up logdir and logging logdir = app.config[ 'LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/logs' if not os.path.exists(logdir): os.makedirs(logdir) # Sets logger w/ name collector_name and level INFO self.logger = logging.getLogger('Inserter') self.logger.setLevel(logging.INFO) # Sets up logging file handler logfile = logdir + '/%s.log' % self.process_name # TODO - port logging rotation params to Mongo for user control later / these default values good handler = logging.handlers.TimedRotatingFileHandler(logfile, when='D', backupCount=30) handler.setLevel(logging.INFO) # Formats format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' dateformat = '%m-%d %H:%M' formatter = logging.Formatter(format, dateformat) handler.setFormatter(formatter) # Adds handler to logger to finish self.logger.addHandler(handler) self.log('STACK inserter for project %s initiated.' % self.project_name) # Sets up data directory self.datadir = app.config[ 'DATADIR'] + '/' + self.project_name + '-' + self.project_id # Establish connections to data directories self.raw = self.datadir + '/' + self.network + '/raw' self.archive = self.datadir + '/' + self.network + '/archive' self.queue = self.datadir + '/' + self.network + '/queue' self.error = self.datadir + '/' + self.network + '/error' if not os.path.exists(self.raw): os.makedirs(self.raw) if not os.path.exists(self.archive): os.makedirs(self.archive) if not os.path.exists(self.queue): os.makedirs(self.queue) if not os.path.exists(self.error): os.makedirs(self.error) self.log('STACK processor setup completed. Now starting...') def go(self): """ Runs the processor """ self.run_flag = self.check_flags()['run'] self.restart_flag = 0 if self.run_flag: self.log('Starting inserter %s with signal %d' % (self.process_name, self.run_flag)) self.set_active(1) while self.run_flag: # Call function to process files self.insert() # Lastly, see if the run status has changed try: flags = self.check_flags() self.run_flag = flags['run'] self.restart_flag = flags['restart'] except Exception as e: self.log( 'Mongo connection refused with exception when attempting to check flags: %s' % e, level='warn') self.log( 'Will keep running the processing until reconnect is established.', level='warn') # Clean up upon run loop conclude self.log('Exiting inserter.') self.set_active(0) def log(self, message, level='info', thread='MAIN:'): """ Logs messages to process logfile """ message = str(message) if level == 'warn': self.logger.warning(thread + ' ' + message) elif level == 'error': self.logger.error(thread + ' ' + message) else: self.logger.info(thread + ' ' + message) def check_flags(self): """ Quick method to grab and return all Mongo flags for given Collector instance """ resp = self.project_db.find_one({'module': self.network}) return { 'run': resp['inserter']['run'], 'restart': resp['inserter']['restart'] } def set_active(self, active): """ Quick method to set the active flag to 1 or 0 """ self.project_db.update({'module': self.network}, {'$set': { 'inserter_active': active }}) def insert(self): """
class BaseCollector(object): """ Extensible base class for all STACK collectors """ def __init__(self, project_id, collector_id, process_name): self.project_id = project_id self.collector_id = collector_id self.process_name = process_name self.collecting_data = False # Sets up connection w/ project config DB & loads in collector info self.db = DB() project = self.db.get_project_detail(self.project_id) if project['status']: self.project_name = project['project_name'] configdb = project['project_config_db'] project_db = self.db.connection[configdb] self.project_db = project_db.config resp = self.db.get_collector_detail(self.project_id, self.collector_id) if resp['status']: collector_info = resp['collector'] # Load in collector info self.collector_name = collector_info['collector_name'] self.network = collector_info['network'] self.api = collector_info['api'] self.collection_type = collector_info['collection_type'] self.params = collector_info['params'] self.terms_list = collector_info['terms_list'] self.languages = collector_info['languages'] self.locations = collector_info['location'] self.auth = collector_info['api_auth'] # TODO - file format to Mongo # TODO - less then hour = warning self.file_format = '%Y%m%d-%H' # If this is a streaming collector if self.collection_type == 'realtime': self.project_db.update({'_id': ObjectId(self.collector_id)}, {'$set': { 'stream_limits': [] }}) # Sets up logdir and logging logdir = app.config[ 'LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/logs' if not os.path.exists(logdir): os.makedirs(logdir) # Sets logger w/ name collector_name and level INFO self.logger = logging.getLogger(self.collector_name) self.logger.setLevel(logging.INFO) # Sets up logging file handler logfile = logdir + '/%s.log' % self.process_name # TODO - logging params # TODO - port logging rotation params to Mongo for user control later / these default values good handler = logging.handlers.TimedRotatingFileHandler(logfile, when='D', backupCount=30) handler.setLevel(logging.INFO) # Formats format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' dateformat = '%m-%d %H:%M' formatter = logging.Formatter(format, dateformat) handler.setFormatter(formatter) # Adds handler to logger to finish self.logger.addHandler(handler) self.log('STACK collector %s initiated.' % self.collector_name) # Sets up rawdir self.rawdir = app.config[ 'DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.network + '/raw' if not os.path.exists(self.rawdir): os.makedirs(self.rawdir) self.log( 'All raw files and directories set. Now starting collector...') def go(self): """ Starts and maintains the loop that monitors the collection thread. Threads are maintained in the extended versions of the class """ # Checks if we're supposed to be running self.run_flag = self.check_flags()['run'] self.collect_flag = 0 self.update_flag = 0 if self.run_flag: self.log('Starting Facebook collector %s with signal %d' % (self.process_name, self.run_flag)) self.set_active(1) # If run_flag is set - begin the loop while self.run_flag: try: flags = self.check_flags() self.run_flag = flags['run'] self.collect_flag = flags['collect'] self.update_flag = flags['update'] except Exception as e: self.log('Mongo connection refused with exception: %s' % e, level='warn') # If we've been flagged to stop or update and we're collecting - shut it down if self.collecting_data and (self.update_flag or not self.collect_flag or not self.run_flag): self.stop_thread() # If we've been flagged to start and we're not collecting - start it up if self.collect_flag and threading.activeCount() == 1: self.start_thread() time.sleep(2) self.log('Exiting Facebook collection.') self.set_active(0) def write(self, data): """ Called to write raw data to raw file - handles rotation """ timestr = time.strftime(self.file_format) filename = self.rawdir + '/' + timestr + '-' + self.collector_name + '-' + self.collector_id + '-out.json' if not os.path.isfile(filename): self.log('Creating new raw file: %s' % filename) with open(filename, 'a') as rawfile: rawfile.write(json.dumps(data).encode('utf-8')) rawfile.write('\n') def log(self, message, level='info', thread='MAIN:'): """ Logs messages to process logfile """ message = str(message) if level == 'warn': self.logger.warning(thread + ' ' + message) elif level == 'error': self.logger.error(thread + ' ' + message) else: self.logger.info(thread + ' ' + message) def check_flags(self): """ Quick method to grab and return all Mongo flags for given Collector instance """ resp = self.db.get_collector_detail(self.project_id, self.collector_id) collector = resp['collector'] return { 'run': collector['collector']['run'], 'collect': collector['collector']['collect'], 'update': collector['collector']['update'], 'active': collector['active'] } def set_active(self, active): """ Quick method to set the active flag to 1 or 0 """ self.project_db.update({'_id': ObjectId(self.collector_id)}, {'$set': { 'active': active }}) def start_thread(self): """ Modify this method when extending the class to manage the actual collection thread """ def stop_thread(self): """
resp = db.get_project_list() print json.dumps(resp, indent=1) elif method == 'get_collector_ids': """ python __main__.py db get_collector_ids project_id """ project_id = sys.argv[3] resp = db.get_collector_ids(project_id) print json.dumps(resp, indent=1) elif method == 'get_project_detail': """ python __main__.py db get_project_detail project_id """ project_id = sys.argv[3] resp = db.get_project_detail(project_id) print json.dumps(resp, indent=1) elif method == 'get_collector_detail': """ python __main__.py db get_collector_detail project_id collector_id """ project_id = sys.argv[3] collector_id = sys.argv[4] resp = db.get_collector_detail(project_id, collector_id) print json.dumps(resp, indent=1) elif method == 'get_network_detail': """ python __main__.py db get_network_detail project_id network """ project_id = sys.argv[3] network = sys.argv[4]