def __init__(self): # Load the configuration for the filters try: filter_list_from_config = Config.filters() except Exception as e: log("ERROR", "Filter configuration error: " + str(e)) sys.exit() # Configuration errors should prevent execution # Create (and empty if needed) base filter path self.filter_base_dir = os.path.join(Config.get('Saved_Log_Dir'), 'temp_filter_processing') if os.path.isdir(self.filter_base_dir): shutil.rmtree(self.filter_base_dir) # Create staging (all original files go here) and final (all filtered files end up here) directories os.makedirs(os.path.join(self.filter_base_dir, 'staging'), exist_ok=True) os.makedirs(os.path.join(self.filter_base_dir, 'final'), exist_ok=True) # Build the filter instance list for filter_from_config in filter_list_from_config: self.instance_list.append( FilterInstance(filter_from_config.get("name"), filter_from_config.get("exe"), filter_from_config.get("type"), filter_from_config.get("timeout"), self.filter_base_dir)) log( "INFO", str(len(filter_list_from_config)) + " valid filters have been found")
def main(): # Parse command line arguments (if any) parser = argparse.ArgumentParser( description= "GlideinMonitor's indexing script for GlideIn .out & .err files") parser.add_argument('-c', help="Path to Config File") parser.add_argument('-f', help="Ignore the lock file and force an index anyway", action='store_true') args = parser.parse_args() # Process config file Config.init(args.c) # Begin Indexing begin_indexing(args)
def main(): # Parse command line arguments (if any) parser = argparse.ArgumentParser( description= "GlideinMonitor's indexing script for GlideIn .out & .err files") parser.add_argument('-c', help="Path to Config File") parser.add_argument('-f', help="Ignore the lock file and force an index anyway", action='store_true') args = parser.parse_args() # Process config file Config.init(args.c) # Check for index job lock lock_location = os.path.join(Config.get('Saved_Log_Dir'), "index_lock") if not pathlib.Path(lock_location).exists(): try: os.mkdir(Config.get('Saved_Log_Dir')) log("INFO", "Creating new directory for index_lock") except FileExistsError: pass pathlib.Path(lock_location).touch() else: # Job index already running/did not complete if not args.f: log("ERROR", "Lock file present in saved log directory") return # Connect to the database db = Database() # Get list of job data that should be indexed job_index_list = determine_indexing(db) # Archive the original files archive_files(db, job_index_list) # Indexing & filtering complete db.commit() log("INFO", "Indexing Complete") # Delete the lock file os.remove(pathlib.Path(lock_location))
def log(error_level, message): if Config.get("Log_Level") == "NONE": return if Config.get("Log_Level") == "ERROR": if error_level != "ERROR": return if Config.get("Log_Level") == "WARNING": if error_level == "INFO": return # Write to error log log_location_dir = os.path.join(Config.get('Log_Dir'), 'indexer') if not os.path.exists(log_location_dir): os.makedirs(log_location_dir) log_location = os.path.join(log_location_dir, datetime.datetime.now().strftime("%Y-%m-%d") + ".txt") with open(log_location, "a") as log_file: log_file.write(error_level + " - " + str(int(datetime.datetime.now().timestamp())) + " - " + message + "\n")
def main(): # Parse command line arguments (if any) parser = argparse.ArgumentParser(description="GlideinMonitor's Flask Web Server") parser.add_argument('-c', help="Path to Config File") args = parser.parse_args() # Process config file Config.init(args.c) # Redirect Flask output to log file log_location_dir = os.path.join(Config.get('Log_Dir'), 'server') if not os.path.exists(log_location_dir): os.makedirs(log_location_dir) log_location = os.path.join(log_location_dir, datetime.datetime.now().strftime("%Y-%m-%d") + ".txt") sys.stderr = open(log_location, "a") sys.stdout = open(log_location, "a") # Start the Server app.run(host=Config.get('Host'), port=Config.get('Port'))
def archive_files(db, job_index_list): saved_dir_name = Config.get('Saved_Log_Dir') datetime_name = datetime.datetime.now().strftime("%Y-%m-%d") # Initialize the filter index_filter = Filter() for job_data in job_index_list: # Check if the current instance is in the database, if not then add it final_dir_name_original = os.path.join(saved_dir_name, "original", job_data["instance_name"], job_data["frontend_user"], datetime_name) final_dir_name_filter = os.path.join(saved_dir_name, "filter", job_data["instance_name"], job_data["frontend_user"], datetime_name) # Create the directories if they do not exist if not os.path.exists(final_dir_name_original): os.makedirs(final_dir_name_original) if not os.path.exists(final_dir_name_filter): os.makedirs(final_dir_name_filter) # Tar the output and error file save_file_name = job_data["instance_name"] + "_" + job_data[ "entry_name"] + "_" + job_data["job_id"] + ".tar.gz" file_path_original = os.path.join(final_dir_name_original, "original_" + save_file_name) file_path_filter = os.path.join(final_dir_name_filter, "filter_" + save_file_name) # Save the original immediately with tarfile.open(file_path_original, "w:gz") as tar: tar.add(job_data["out_file_path"], arcname=os.path.basename(job_data["out_file_path"])) tar.add(job_data["err_file_path"], arcname=os.path.basename(job_data["err_file_path"])) tar.close() # An archive of the original files has been created, filePath_original # Now, add the job to the filter queue and give it the final destination full path, filePath_filter index_filter.add_job(file_path_filter, job_data) # Add/Update it in the database db.add_job(job_data, file_path_original, file_path_filter) # Ensure filters have completed running before the archive is complete while index_filter.filters_still_running(): time.sleep(1) # Cleanup filter folders index_filter.cleanup()
def api_job_file(job_id, given_guid): # Get configuration file_type = Config.get('DisplayType') # Sends the job file itself db = Database() path = db.getFile(job_id, given_guid, file_type) db.quit() # If it's not found, send a 404 if path is None: abort(404) return path
def begin_indexing(args): # Check for index job lock lock_location = os.path.join(Config.get('Saved_Log_Dir'), "index_lock") if not pathlib.Path(lock_location).exists(): pathlib.Path(lock_location).touch() else: # Job index already running/did not complete if not args.f: log("ERROR", "Lock file present in saved log directory") return # Entry point for indexing db = Database() jobs_updated = 0 saved_dir_name = Config.get('Saved_Log_Dir') datetime_name = datetime.datetime.now().strftime("%Y-%m-%d") log("INFO", "Begin Indexing") # Get a dictionary of jobs from the GWMS_Log_Dir directory tree = directory_jobs(Config.get('GWMS_Log_Dir')) log("INFO", "Directory Listing Completion") # Iterate through each job checking the database if it needs to be updated for job_name, job_data in tree.items(): # Skip entries that are missing an err/out file if "err_file_path" not in job_data or "out_file_path" not in job_data: log( "INFO", "Missing ERR/OUT file for entry - jobID: " + job_data["entry_name"] + " - " + str(job_data["job_id"])) continue # Check if the current instance is in the database, if not then add it final_dir_name = os.path.join(saved_dir_name, job_data["instance_name"], job_data["frontend_user"], datetime_name) if db.needs_update(job_data): # Create the directory if it does not exist if not os.path.exists(final_dir_name): os.makedirs(final_dir_name) # Check if the file has certain logs within it found_logs = { "MasterLog": False, "StartdLog": False, "StarterLog": False, "StartdHistoryLog": False, "glidein_activity": False } if job_data['err_file_size'] != 0: with open(job_data["err_file_path"], 'rb', 0) as file, mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) as s: if s.find(b'MasterLog\n========') != -1: found_logs["MasterLog"] = True if s.find(b'StartdLog\n========') != -1: found_logs["StartdLog"] = True if s.find(b'StarterLog\n========') != -1: found_logs["StarterLog"] = True if s.find(b'StartdHistoryLog\n========') != -1: found_logs["StartdHistoryLog"] = True if s.find( b'=== Encoded XML description of glidein activity ===' ) != -1: found_logs["glidein_activity"] = True # Tar the output and error file curr_job_path = os.path.join( final_dir_name, job_name[0] + "_" + job_name[1] + "_" + job_name[2] + ".tar.gz") with tarfile.open(curr_job_path, "w:gz") as tar: tar.add(job_data["out_file_path"], arcname=os.path.basename(job_data["out_file_path"])) tar.add(job_data["err_file_path"], arcname=os.path.basename(job_data["err_file_path"])) tar.close() # Add/Update it in the database db.add_job(job_data, curr_job_path, found_logs) # Job added/updated jobs_updated += 1 # Indexing complete db.commit() # Delete the lock file os.remove(pathlib.Path(lock_location)) log("INFO", "Jobs added/updated " + str(jobs_updated)) log("INFO", "Indexing Complete")
def verify_password(username, password): if username in Config.get('Users'): return Config.get('Users').get(username) == hashlib.md5(password.encode()).hexdigest() return False
def determine_indexing(db): # Entry point for indexing jobs_updated = 0 log("INFO", "Begin Indexing") # Get a dictionary of jobs from the GWMS_Log_Dir directory tree = directory_jobs(Config.get('GWMS_Log_Dir')) log("INFO", "Directory Listing Completion") # List to be exported job_index_list = [] # Iterate through each job checking the database if it needs to be updated for job_name, job_data in tree.items(): # Skip entries that are missing an '.err'/'.out'. file if "err_file_path" not in job_data or "out_file_path" not in job_data: log( "INFO", "Missing ERR/OUT file for entry - jobID: " + job_data["entry_name"] + " - " + str(job_data["job_id"])) continue if db.needs_update(job_data): # Check if the file has certain logs within it found_logs = { "MasterLog": False, "StartdLog": False, "StarterLog": False, "StartdHistoryLog": False, "glidein_activity": False } if job_data['err_file_size'] != 0: with open(job_data["err_file_path"], 'rb', 0) as file, mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) as s: if s.find(b'MasterLog\n========') != -1: found_logs["MasterLog"] = True if s.find(b'StartdLog\n========') != -1: found_logs["StartdLog"] = True if s.find(b'StarterLog\n========') != -1: found_logs["StarterLog"] = True if s.find(b'StartdHistoryLog\n========') != -1: found_logs["StartdHistoryLog"] = True if s.find( b'=== Encoded XML description of glidein activity ===' ) != -1: found_logs["glidein_activity"] = True # Add found logs into the job data job_data.update(found_logs) # Add the job to list to be indexed job_index_list.append(job_data) # Job added/updated jobs_updated += 1 log("INFO", "Jobs to be added/updated " + str(jobs_updated)) return job_index_list
def __init__(self): # Connect to SQLite unless specified otherwise in the config file if Config.db("type") == "sqlite": # SQLite Database try: os.mkdir(Config.db("dir")) log("INFO", "Creating new directory for SQLite DB") except FileExistsError: pass self.conn = sqlite3.connect(os.path.join(Config.db("dir"), "%s.sqlite" % Config.db("db_name"))) # Check if index table exists db_cursor = self.conn.cursor() db_cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='file_index';") if db_cursor.fetchone() is None: # It doesn't, create it log("INFO", "Creating new SQLite database") script_file = open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "sqliteTableCreation.sql"), 'r') script = script_file.read() script_file.close() db_cursor.executescript(script) elif Config.db("type") == "mysql": # MySQL Database if not MYSQL_AVAILABLE: log("ERROR", "MySQL database selected but there is no MySQL connector") raise ImportError("Module not found: mysql.connector") try: self.conn = mysql.connector.connect( host=Config.db("host"), user=Config.db("user"), passwd=Config.db("pass"), database=Config.db("db_name") ) mycursor = self.conn.cursor() except mysql.connector.errors.ProgrammingError: # Create the database log("INFO", "Creating new MySQL Database") mydb = mysql.connector.connect( host=Config.db("host"), user=Config.db("user"), passwd=Config.db("pass") ) mycursor = mydb.cursor() mycursor.execute("CREATE DATABASE " + Config.db("db_name")) self.conn = mysql.connector.connect( host=Config.db("host"), user=Config.db("user"), passwd=Config.db("pass"), database=Config.db("db_name") ) mycursor = self.conn.cursor() # Check if the table exists mycursor.execute("SHOW TABLES") if ('file_index',) not in mycursor: # Create table log("INFO", "Creating MySQL File Index table") script_file = open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "mysqlTableCreation.sql"), 'r') script = script_file.read() script_file.close() mycursor.execute(script) else: log("ERROR", "No valid database selected (%s)" % Config.db("type")) raise ImportError("Invalid ")