def main(): """Main function to start the Gunicorn WSGI.""" # Initialize key variables config = Config() # Make sure we have a database _ = connectivity() # Create agent object for web_proxy agent_gunicorn = Agent(PATTOO_API_AGENT_PROXY, config=config) # Create agent for daemon config = Config() agent_api = AgentAPI( PATTOO_API_AGENT_NAME, PATTOO_API_AGENT_PROXY, PATTOO_API_AGENT, config=config) # Add set API email address agent_api.set_api_email() # Set up encryption using Pgpier in Agent agent_api.set_gnupg() # Creation and retrieval of Pgpier object # Do control (API first, Gunicorn second) cli = AgentCLI() cli.control(agent_api) cli.control(agent_gunicorn)
def check_lockfile(): """Delete lockfile if found and ingester is not running. Args: None Returns: running: True if ingester script is running """ # Initialize key variables agent_name = 'pattoo_ingester' config = Config() lockfile = shared_files.lock_file(agent_name, config) # Script running running = sysinfo.process_running(PATTOO_INGESTER_SCRIPT) # Delete lockfile if found and ingester is not running. # Caused by possible crash. if os.path.exists(lockfile) is True and running is False: os.remove(lockfile) log_message = ('''\ Lock file {} found, but the {} script is not running\ '''.format(lockfile, PATTOO_INGESTER_SCRIPT)) log.log2warning(20030, log_message) return running
def main(): """Start the Gunicorn WSGI. Args: None Returns: None """ # Initialize key variables config = Config() # Make sure we have a database _ = connectivity() # Create agent object for web_proxy agent_gunicorn = Agent(PATTOO_API_WEB_PROXY, config=config) # Create agent for daemon agent_api = AgentAPI( PATTOO_API_WEB_NAME, PATTOO_API_WEB_PROXY, PATTOO_API_WEB, config=config) # Do control (API first, Gunicorn second) cli = AgentCLI() cli.control(agent_api) cli.control(agent_gunicorn)
def main(): """Process agent data. Args: None Returns: None """ # Initialize key variables use_mysql = True global POOL global URL pool_timeout = 30 pool_recycle = min(10, pool_timeout - 10) # Get configuration config = Config() # Define SQLAlchemy parameters from configuration pool_size = config.db_pool_size() max_overflow = config.db_max_overflow() # Create DB connection pool if use_mysql is True: URL = ('mysql+pymysql://{}:{}@{}/{}?charset=utf8mb4'.format( config.db_username(), config.db_password(), config.db_hostname(), config.db_name())) # Fix for multiprocessing on pools. # _add_engine_pidguard(QueuePool) # Add MySQL to the pool db_engine = create_engine(URL, echo=False, echo_pool=False, encoding='utf8', poolclass=QueuePool, max_overflow=max_overflow, pool_size=pool_size, pool_pre_ping=True, pool_recycle=pool_recycle, pool_timeout=pool_timeout) # Fix for multiprocessing on engines. # _add_engine_pidguard(db_engine) # Ensure connections are disposed before sharing engine. db_engine.dispose() # Create database session object POOL = scoped_session( sessionmaker(autoflush=True, autocommit=False, bind=db_engine)) else: POOL = None
def main(): """Main function to start the Gunicorn WSGI.""" # Initialize key variables config = Config() # Make sure we have a database _ = connectivity() # Create agent object for web_proxy agent_gunicorn = Agent(PATTOO_API_AGENT_PROXY, config=config) # Create agent for daemon config = Config() agent_api = AgentAPI(PATTOO_API_AGENT_NAME, PATTOO_API_AGENT_PROXY, PATTOO_API_AGENT, config=config) # Do control (API first, Gunicorn second) cli = AgentCLI() cli.control(agent_api) cli.control(agent_gunicorn)
def _mysql(): """Create database tables. Args: None Returns: None """ # Initialize key variables config = Config() pool_size = config.db_pool_size() max_overflow = config.db_max_overflow() # Add MySQL to the pool engine = create_engine(URL, echo=True, encoding='utf8', max_overflow=max_overflow, pool_size=pool_size, pool_recycle=3600) # Try to create the database print('??: Attempting to Connect to configured database.') try: sql_string = ('''\ ALTER DATABASE {} CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci\ '''.format(config.db_name())) engine.execute(sql_string) except: log_message = ('''\ ERROR: Cannot connect to database "{}" on server "{}". Verify database server \ is started. Verify database is created. Verify that the configured database \ authentication is correct.'''.format(config.db_name(), config.db_hostname())) log.log2die(20086, log_message) # Apply schemas print('OK: Database connected.') print('??: Attempting to create database tables.') BASE.metadata.create_all(engine) print('OK: Database tables created.')
def __init__(self, pattoo_db_records_lists): """Initialize the class. Args: pattoo_db_records_lists: List of PattooDBrecord oject lists grouped by source and sorted by timestamp. This data is obtained from PattooShared.converter.extract Returns: None """ # Initialize key variables config = Config() # Setup the arguments for multiprocessing self._arguments = [(_, ) for _ in pattoo_db_records_lists if bool(_) is True] self._multiprocess = config.multiprocessing() self._pool_size = cpu_count()
def main(): """Start the pattoo ingester daemon. Args: None Returns: None """ # Initialize key variables config = Config() # Make sure we have a database _ = connectivity() # Poll agent_poller = PollingAgent(PATTOO_INGESTERD_NAME, config=config) # Do control cli = AgentCLI() cli.control(agent_poller, graceful=True)
def _lock(delete=False): """Create a lock file. Args: delete: Delete the file if true Returns: None """ # Initialize key variables config = Config() lockfile = files.lock_file(PATTOO_INGESTER_NAME, config) success = False # Lock if bool(delete) is False: if os.path.exists(lockfile) is True: log_message = ('''\ Lockfile {} exists. Will not start ingester script. Is another Ingester \ instance running? If not, delete the lockfile and rerun this script.\ '''.format(lockfile)) log.log2warning(20023, log_message) else: open(lockfile, 'a').close() success = True else: if os.path.exists(lockfile) is True: try: os.remove(lockfile) success = True except: log_message = ('Error deleting lockfile {}.'.format(lockfile)) log.log2warning(20107, log_message) else: log_message = ('Lockfile {} not found.'.format(lockfile)) log.log2warning(20108, log_message) return success
def __init__(self, batch_size=500, age=0): """Initialize the class. Args: batch_size: Number of files to read age: Minimum age of files to be read per batch Returns: None """ # Get cache directory config = Config() directory = config.agent_cache_directory(PATTOO_API_AGENT_NAME) self._batch_id = int(time.time() * 1000) # Read data from cache. Stop if there is no data found. self._data = files.read_json_files(directory, die=False, age=age, count=batch_size) # Save the number of files read self.files = len(self._data)
PATTOO_API_WEB_REST_PREFIX = '{}/rest'.format(PATTOO_API_WEB_PREFIX) # Setup flask and secret key config PATTOO_API_WEB = Flask(__name__) # Import GraphQLAuth from Flask-GraphQL-Auth from flask_graphql_auth import GraphQLAuth ############################################################################### # # Setup Flask-GraphQL-Auth # ############################################################################### # Getting pattoo configuration and setting JWT secrete key config = Config() PATTOO_API_WEB.config['JWT_SECRET_KEY'] = config.jwt_secret_key() PATTOO_API_WEB.config['JWT_ACCESS_TOKEN_EXPIRES'] = config.acesss_token_exp() PATTOO_API_WEB.config['JWT_REFRESH_TOKEN_EXPIRES'] = config.refresh_token_exp() # Initialize authentication for pattoo api web flask app instance auth = GraphQLAuth(PATTOO_API_WEB) # Setup memcache. Required for all API imports CACHE = Cache(PATTOO_API_WEB, config={'CACHE_TYPE': 'simple'}) # Import PATTOO_API_WEB Blueprints (MUST be done after CACHE) from pattoo.api.web.graphql import GRAPHQL from pattoo.api.web.rest import REST_API_DATA from pattoo.api.web.status import API_STATUS
def process_cache(batch_size=500, max_duration=3600, fileage=10, script=False): """Ingest data. Args: batch_size: Number of files to process at a time max_duration: Maximum duration fileage: Minimum age of files to be processed in seconds Returns: success: True if successful Method: 1) Read the files in the cache directory older than a threshold 2) Process the data in the files 3) Repeat, if new files are found that are older than the threshold, or we have been running too long. Batches of files are read to reduce the risk of overloading available memory, and ensure we can exit if we are running too long. """ # Initialize key variables records = 0 start = time.time() looptime = 0 files_read = 0 success = True # Get cache directory config = Config() directory = config.agent_cache_directory(PATTOO_API_AGENT_NAME) # Log what we are doing log_message = 'Processing ingest cache.' log.log2info(20085, log_message) # Get the number of files in the directory files_found = len( [_ for _ in os.listdir(directory) if _.endswith('.json')]) # Create lockfile only if running as a script. # The daemon has its own locking mechanism if bool(script) is True: success = _lock() if bool(success) is False: return bool(success) # Process the files in batches to reduce the database connection count # This can cause errors while True: # Agents constantly update files. We don't want an infinite loop # situation where we always have files available that are newer than # the desired fileage. loopstart = time.time() fileage = fileage + looptime # Automatically stop if we are going on too long.(1 of 2) duration = loopstart - start if duration > max_duration: log_message = ('''\ Stopping ingester after exceeding the maximum runtime duration of {}s. \ This can be adjusted on the CLI.'''.format(max_duration)) log.log2info(20022, log_message) break # Automatically stop if we are going on too long.(2 of 2) if files_read >= files_found: # No need to log. This is an expected outcome. break # Read data from cache. Stop if there is no data found. cache = Cache(batch_size=batch_size, age=fileage) count = cache.ingest() # Automatically stop if we are going on too long.(2 of 2) if bool(cache.files) is False: # No need to log. This is an expected outcome. break # Get the records processed, looptime and files read records += count files_read += cache.files looptime = max(time.time() - loopstart, looptime) # Print result duration = time.time() - start if bool(records) is True and bool(duration) is True: log_message = ('''\ Agent cache ingest completed. {0} records processed in {1:.2f} seconds, \ {2:.2f} records / second. {3} files read. \ '''.format(records, duration, records / duration, files_read)) log.log2info(20084, log_message) else: log_message = 'No files found to ingest' log.log2info(20021, log_message) # Delete lockfile only if running as a script. # The daemon has its own locking mechanism if bool(script) is True: success = _lock(delete=True) # Log what we are doing log_message = 'Finished processing ingest cache.' log.log2info(20020, log_message) return bool(success)