示例#1
0
def logger_Sart(log_filename, verboseMode):
    global logger, logger_stop_event, logger_process, logger_config_listener

    logger_config_listener = {
        'version': 1,
        'disable_existing_loggers': False,
        'formatters': {
            'detailed': {
                'class': 'logging.Formatter',
                'format': '%(asctime)s %(name)-16s %(levelname)-8s %(processName)-10s %(message)s'
            },
            'simple': {
                'class': 'logging.Formatter',
                'format': '%(asctime)s %(levelname)-8s %(message)s'
            }
        },
        'handlers': {
            'console': {
                'class': 'logging.StreamHandler',
                'level': 'INFO',
                'formatter': 'simple',
            },
            'file': {
                'class': 'logging.FileHandler',
                'filename': log_filename + '.log',
                'mode': 'w',
                'formatter': 'detailed',
            },
            'errors': {
                'class': 'logging.FileHandler',
                'filename': log_filename + '-errors.log',
                'mode': 'w',
                'level': 'ERROR',
                'formatter': 'detailed',
            },
        },
        'root': {
            'level': 'DEBUG',
            'handlers': ['console', 'file', 'errors']
        },
    }

    logging.config.dictConfig(logger_config_initial)
    logger = logging.getLogger()

    # Create logger listener
    logger_stop_event = Event()
    logger_process = Process(target=logger_listener_process, name='listener', args=(logger_queue, logger_stop_event, logger_config_listener))
    logger_process.start()
    if verboseMode:
        logger.setLevel(logging.DEBUG)
    else: logger.setLevel(logging.INFO)


    logger.info("-------------------------------Log started-------------------------------")
    logger.debug("Python version: %s" % ''.join((sys.version).splitlines()))
    from appAux import psutil_phymem_usage
    logger.debug("Physical mem used: %d%%" % psutil_phymem_usage())
示例#2
0
    def rebalance(self):
        if self.governorOffFlag:
            return
        progProducers = self.getProgressProducers()
        progConsumers = self.getProgressConsumers()
        num_tasks = self.get_num_tasks()
        elapsed_backoff_time = (datetime.now() -
                                self._rebalance_last_kick).seconds

        logger.debug("Starting balancing (timer: %d/%d)" %
                     (elapsed_backoff_time, self.rebalance_backoff_timer))

        # Kill producers if all tasks have been served
        if num_tasks == progProducers and progProducers > 0:
            self.endProducers()
            return

        # Restart paused production on backoff timer or if we have at least 20% memory available
        if self.num_producers == 0 and (
            (elapsed_backoff_time > self.rebalance_backoff_timer)
                or psutil_phymem_usage() < 80):
            logger.debug("Rebalancing, restarting production")
            self.addProducer()
            return

        # Memory governor
        # Pause production if we're over 90%
        if psutil_phymem_usage() > 90:
            logger.debug("Rebalancing, mem > 90%, pausing production")
            self.endProducers()
            self._rebalance_last_kick = datetime.now()
            return
        # Reduce production if we're over 75%
        if psutil_phymem_usage() > 75 and self.num_producers > 1:
            if (datetime.now() - self._rebalance_mem_last_kick
                ).seconds > self.rebalance_mem_backoff_timer:
                logger.debug("Rebalancing, mem > 75%")
                self.removeProducer()
                self._rebalance_mem_last_kick = datetime.now()
                return

        # Memory pressure check
        if psutil_phymem_usage() > 70:
            if (datetime.now() - self._rebalance_mem_last_kick
                ).seconds > self.rebalance_mem_backoff_timer:
                logger.debug("Rebalancing mem, recycling processes")
                self.restartConsumers()
                self.restartProducers()
                gc.collect()
                self._rebalance_mem_last_kick = datetime.now()
                logger.debug("Rebalancing mem, recycling processes - done")
                return
            else:
                logger.debug(
                    "Rebalance (Memory pressure check) postponed, waiting for rebalance_backoff_timer"
                )

        # We wait until tasks are moving along to start rebalancing stuff
        if progProducers < (num_tasks / 10):
            return

        # Auto-balancing
        if progProducers > progConsumers * 2:
            if self.num_producers > 1:
                if elapsed_backoff_time > self.rebalance_backoff_timer:
                    logger.debug("Rebalancing, too many producers")
                    self.removeProducer()
                    self._rebalance_last_kick = datetime.now()
                    return
                else:
                    logger.debug(
                        "Rebalance postponed, waiting for rebalance_backoff_timer"
                    )
        elif progProducers < progConsumers * 1.20:
            if num_tasks > progProducers * 1.20:
                if psutil_phymem_usage(
                ) < 70 and elapsed_backoff_time > self.rebalance_backoff_timer:
                    logger.debug("Rebalancing")
                    self.addProducer()
                    self._rebalance_last_kick = datetime.now()
                    return
                else:
                    logger.debug(
                        "Rebalance (Auto-balancing) postponed, waiting for rebalance_backoff_timer"
                    )

        logger.debug("Balancing done")
示例#3
0
def appLoadMP(pathToLoad, dbfilenameFullPath, maxCores, governorOffFlag):
    global _tasksPerJob

    files_to_process = []
    conn = None

    # Start timer
    t0 = datetime.now()

    logger.debug("Starting appLoadMP")
    # Calculate aggreagate file_filter for all ingest types supported:
    file_filter = '|'.join([v.getFileNameFilter() for k,v in ingest_plugins.iteritems()])
    # Add zip extension
    file_filter += "|.*\.zip"

    # Check if we're loading Redline data
    if os.path.isdir(pathToLoad) and os.path.basename(pathToLoad).lower() == 'RedlineAudits'.lower():
        files_to_process = searchRedLineAudits(pathToLoad)
    else:
        # Search for all files to be processed
        if os.path.isdir(pathToLoad):
            files_to_process = searchFolders(pathToLoad, file_filter)
        else:
            files_to_process = processArchives(pathToLoad, file_filter)

    if files_to_process:
        # Init DB if required
        DB = appDB.DBClass(dbfilenameFullPath, True, settings.__version__)
        conn = DB.appConnectDB()

        # Extract hostnames, grab existing host IDs from DB and calculate instance ID for new IDs to be ingested:
        instancesToProcess = []
        instancesToProcess += GetIDForHosts(files_to_process, DB)
        countInstancesToProcess = len(instancesToProcess)
        logger.info("Found %d new instances" % (countInstancesToProcess))

        # Setup producers/consumers initial counts
        num_consumers = 1
        num_producers = 1

        # Setup MPEngine
        mpe = MPEngineProdCons(maxCores, appLoadProd, appLoadCons, governorOffFlag)

        # Reduce _tasksPerJob for small jobs
        if countInstancesToProcess < _tasksPerJob: _tasksPerJob = 1

        # Create task list
        task_list = []
        instancesPerJob = _tasksPerJob
        num_tasks = 0
        for chunk in chunks(instancesToProcess, instancesPerJob):
            # todo: We no longer need pathToLoad as tasks include the fullpath now
            task_list.append(Task(pathToLoad, chunk))
            num_tasks += 1

        if num_tasks > 0:
            # Check if we have to drop indexes to speedup insertions
            # todo: Research ratio of existing hosts to new hosts were this makes sense
            if countInstancesToProcess > 1000 or DB.CountHosts() < 1000:
                DB.appDropIndexesDB()

            # Queue tasks for Producers
            mpe.addTaskList(task_list)

            # Start procs
            mpe.startProducers(num_producers)
            mpe.startConsumers(num_consumers, [dbfilenameFullPath])
            # mpe.addProducer()

            # Control loop
            while mpe.working():
                time.sleep(1.0)
                (num_producers,num_consumers,num_tasks,progress_producers,progress_consumers) = mpe.getProgress()
                elapsed_time = datetime.now() - t0
                mean_loadtime_per_host = (elapsed_time) / max(1, _tasksPerJob * progress_consumers)
                pending_hosts = ((num_tasks * _tasksPerJob) - (_tasksPerJob * progress_consumers))
                etr = (mean_loadtime_per_host * pending_hosts)
                eta = t0 + elapsed_time + etr
                ett = (eta - t0)
                if settings.logger_getDebugMode(): status_extra_data = " Prod: %s Cons: %s (%d -> %d -> %d: %d) [RAM: %d%% / Obj: %d / ETH: %s / ETA: %s / ETT: %s]" % \
                                                                       (num_producers, num_consumers, num_tasks, progress_producers, progress_consumers, progress_producers - progress_consumers,
                     psutil_phymem_usage(), len(gc.get_objects()),
                     mean_loadtime_per_host if progress_consumers * _tasksPerJob > 100 else "N/A",
                     str(eta.time()).split(".")[0] if progress_consumers * _tasksPerJob > 100 else "N/A",
                     str(ett).split(".")[0] if progress_consumers * _tasksPerJob > 100 else "N/A")
                else: status_extra_data = ""
                # logger.info("Parsing files%s" % status_extra_data)

                logger.info(update_progress(min(1,float(progress_consumers) / float(num_tasks)), "Parsing files%s" % status_extra_data, True))
                mpe.rebalance()

            del mpe

        # Stop timer
        elapsed_time = datetime.now() - t0
        mean_loadtime_per_host = (elapsed_time) / max(1, countInstancesToProcess)
        logger.info("Load speed: %s seconds / file" % (mean_loadtime_per_host))
        logger.info("Load time: %s" % (str(elapsed_time).split(".")[0]))
    else:
        logger.info("Found no files to process!")
示例#4
0
def GetIDForHosts(fileFullPathList, DB):
    # Returns: (filePath, instanceID, hostname, hostID, ingest_type)
    hostsTest = {}
    hostsProcess = []
    progress_total = 0
    progress_current = 0

    # Determine plugin_type and hostname
    for file_name_fullpath in fileFullPathList:
        hostName = None
        ingest_type = None
        loop_counter = 0
        while True:
            if loop_counter > len(ingest_plugins_types_stack):
                # We ignore empty file from hosts with no appcompat data
                # todo: Omit suppression on verbose mode
                tmp_file_size = file_size(file_name_fullpath)
                if tmp_file_size > 500:
                    logger.warning("No ingest plugin could process: %s (skipping file) [size: %d]" %
                                   (ntpath.basename(file_name_fullpath), tmp_file_size))
                break
            ingest_type = ingest_plugins_types_stack[0]
            if ingest_plugins[ingest_type].matchFileNameFilter(file_name_fullpath):
                # Check magic:
                try:
                    magic_check = ingest_plugins[ingest_type].checkMagic(file_name_fullpath)
                    if isinstance(magic_check, tuple):
                        logger.error("Report bug")
                    else: magic_check_res = magic_check
                    if magic_check_res:
                        # Magic OK, go with this plugin
                        hostName = ingest_plugins[ingest_type].getHostName(file_name_fullpath)
                        break
                except Exception as e:
                    logger.exception("Error processing: %s (%s)" % (file_name_fullpath, str(e)))
            # Emulate stack with list to minimize internal looping (place last used plugin at the top)
            ingest_plugins_types_stack.remove(ingest_type)
            ingest_plugins_types_stack.insert(len(ingest_plugins_types_stack), ingest_type)
            loop_counter += 1
        if hostName is not None:
            if hostName in hostsTest:
                hostsTest[hostName].append((file_name_fullpath, ingest_plugins[ingest_type]))
            else:
                hostsTest[hostName] = []
                hostsTest[hostName].append((file_name_fullpath, ingest_plugins[ingest_type]))

    progress_total = len(hostsTest.keys())
    # Iterate over hosts. If host exists in DB grab rowID else create and grab rowID.
    conn = DB.appGetConn()
    with closing(conn.cursor()) as c:
        for hostName in hostsTest.keys():
            assert(hostName)
            logger.debug("Processing host: %s" % hostName)
            # Check if Host exists
            c.execute("SELECT count(*) FROM Hosts WHERE HostName = '%s'" % hostName)
            data = c.fetchone()[0]
            if (data != 0):
                # Host already has at least one instance in the DB
                c.execute("SELECT HostID, Instances FROM Hosts WHERE HostName = '%s'" % hostName)
                data = c.fetchone()
                tmpHostID = data[0]
                tmpInstances = eval(data[1])
                for (file_fullpath, ingest_plugin) in hostsTest[hostName]:
                    logger.debug("Grabbing instanceID from file: %s" % file_fullpath)
                    try:
                        instance_ID = CalculateInstanceID(file_fullpath, ingest_plugin)
                    except Exception:
                        logger.error("Error parsing: %s (skipping)" % file_fullpath)
                        traceback.print_exc(file=sys.stdout)
                    else:
                        if str(instance_ID) not in tmpInstances:
                            tmpInstances.append(str(instance_ID))
                            hostsProcess.append((file_fullpath, instance_ID, hostName, tmpHostID, ingest_plugin))
                        else:
                            logger.debug("Duplicate host and instance found: %s" %hostName)
                            continue
                # Save updated Instances list
                c.execute("UPDATE Hosts SET Instances = %s, InstancesCounter = %d WHERE HostName = '%s'" % ('"' + str(repr(tmpInstances)) + '"', len(tmpInstances), hostName))
            else:
                # Host does not exist. Add instance and grab the host ID.
                tmpInstances = []
                newInstances = []
                for (file_fullpath, ingest_plugin) in hostsTest[hostName]:
                    try:
                        instance_ID = CalculateInstanceID(file_fullpath, ingest_plugin)
                    except Exception:
                        logger.error("Error parsing: %s (skipping)" % file_fullpath)
                        traceback.print_exc(file=sys.stdout)
                    else:
                        if str(instance_ID) not in tmpInstances:
                            tmpInstances.append(str(instance_ID))
                            newInstances.append((file_fullpath, instance_ID, ingest_plugin))

                c.execute("INSERT INTO Hosts VALUES (NULL,%s,%s,%d,%d,%d)" % ('"' + hostName + '"', '"' + str(repr(tmpInstances)) + '"', len(tmpInstances), 0, 0))
                tmpHostID = c.lastrowid
                for (file_fullpath, instance_ID, ingest_plugin) in newInstances:
                    # todo: Do we want/need each row to track from what instance it came?
                    hostsProcess.append((file_fullpath, instance_ID, hostName, tmpHostID, ingest_plugin))
            # Update progress
            progress_current += 1
            if settings.logger_getDebugMode():
                status_extra_data = " [RAM: %d%%]" % psutil_phymem_usage()
            else: status_extra_data = ""
            # logger.debug("Pre-process new hosts/instances%s" % status_extra_data)
            logger.info(update_progress(min(1, float(progress_current) / float(progress_total)), "Calculate ID's for new hosts/instances%s" % status_extra_data, True))
        conn.commit()

    # Return hosts to be processed
    return hostsProcess