def newPosition(self, globalBestPosition, rng): """See comments in base class.""" # First, update the velocity. The new velocity is given as: # v = (inertia * v) + (cogRate * r1 * (localBest-pos)) # + (socRate * r2 * (globalBest-pos)) # # where r1 and r2 are random numbers between 0 and 1.0 lb = float(Configuration.get("nupic.hypersearch.randomLowerBound")) ub = float(Configuration.get("nupic.hypersearch.randomUpperBound")) self._velocity = (self._velocity * self._inertia + rng.uniform(lb, ub) * self._cogRate * (self._bestPosition - self.getPosition())) if globalBestPosition is not None: self._velocity += rng.uniform(lb, ub) * self._socRate * ( globalBestPosition - self.getPosition()) # update position based on velocity self._position += self._velocity # Clip it self._position = max(self.min, self._position) self._position = min(self.max, self._position) # Return it return self.getPosition()
def dbValidator(): """ Let the user know what NuPIC config file is being used and whether or not they have mysql set up correctly for swarming. """ fileused = getFileUsed() # Get the values we need from NuPIC's configuration host = Configuration.get("nupic.cluster.database.host") port = int(Configuration.get("nupic.cluster.database.port")) user = Configuration.get("nupic.cluster.database.user") passwd = Configuration.get("nupic.cluster.database.passwd") print("This script will validate that your MySQL is setup correctly for ") print("NuPIC. MySQL is required for NuPIC swarming. The settings are") print("defined in a configuration file found in ") print("$NUPIC/src/nupic/support/nupic-default.xml Out of the box those ") print("settings contain MySQL's default access credentials.") print() print("The nupic-default.xml can be duplicated to define user specific ") print("changes calling the copied file ") print("$NUPIC/src/nupic/support/nupic-site.xml Refer to the ") print("nupic-default.xml for additional instructions.") print() print("Defaults: localhost, 3306, root, no password") print() print("Retrieved the following NuPIC configuration using: ", fileused) print(" host : ", host) print(" port : ", port) print(" user : "******" passwd : ", "*" * len(passwd)) testDbConnection(host, port, user, passwd) print("Connection successful!!")
def newPosition(self, globalBestPosition, rng): """See comments in base class.""" # First, update the velocity. The new velocity is given as: # v = (inertia * v) + (cogRate * r1 * (localBest-pos)) # + (socRate * r2 * (globalBest-pos)) # # where r1 and r2 are random numbers between 0 and 1.0 lb=float(Configuration.get("nupic.hypersearch.randomLowerBound")) ub=float(Configuration.get("nupic.hypersearch.randomUpperBound")) self._velocity = (self._velocity * self._inertia + rng.uniform(lb, ub) * self._cogRate * (self._bestPosition - self.getPosition())) if globalBestPosition is not None: self._velocity += rng.uniform(lb, ub) * self._socRate * ( globalBestPosition - self.getPosition()) # update position based on velocity self._position += self._velocity # Clip it self._position = max(self.min, self._position) self._position = min(self.max, self._position) # Return it return self.getPosition()
def __init__(self, clamodel, anomalyParams={}): if anomalyParams is None: anomalyParams = {} self.clamodel = clamodel self._version = CLAModelClassifierHelper.__VERSION__ self._classificationMaxDist = 0.1 if 'autoDetectWaitRecords' not in anomalyParams or \ anomalyParams['autoDetectWaitRecords'] is None: self._autoDetectWaitRecords = int( Configuration.get('nupic.model.temporalAnomaly.wait_records')) else: self._autoDetectWaitRecords = anomalyParams[ 'autoDetectWaitRecords'] if 'autoDetectThreshold' not in anomalyParams or \ anomalyParams['autoDetectThreshold'] is None: self._autoDetectThreshold = float( Configuration.get( 'nupic.model.temporalAnomaly.auto_detect_threshold')) else: self._autoDetectThreshold = anomalyParams['autoDetectThreshold'] if 'anomalyCacheRecords' not in anomalyParams or \ anomalyParams['anomalyCacheRecords'] is None: self._history_length = int( Configuration.get('nupic.model.temporalAnomaly.window_length')) else: self._history_length = anomalyParams['anomalyCacheRecords'] if 'anomalyVectorType' not in anomalyParams or \ anomalyParams['anomalyVectorType'] is None: self._vectorType = str( Configuration.get( 'nupic.model.temporalAnomaly.anomaly_vector')) else: self._vectorType = anomalyParams['anomalyVectorType'] self._activeColumnCount = \ self.clamodel._getSPRegion().getSelf().getParameter('numActivePerInhArea') # Storage for last run self._anomalyVectorLength = None self._classificationVector = numpy.array([]) self._prevPredictedColumns = numpy.array([]) self._prevTPCells = numpy.array([]) # Array of CLAClassificationRecord's used to recompute and get history self.saved_states = [] self.saved_categories = []
def _getCommonSteadyDBArgsDict(): """ Returns a dictionary of arguments for DBUtils.SteadyDB.SteadyDBConnection constructor. """ return dict(creator=pymysql, host=Configuration.get('nupic.cluster.database.host'), port=int(Configuration.get('nupic.cluster.database.port')), user=Configuration.get('nupic.cluster.database.user'), passwd=Configuration.get('nupic.cluster.database.passwd'), charset='utf8', use_unicode=True, setsession=['SET AUTOCOMMIT = 1'])
def __init__(self, clamodel, anomalyParams=None): if anomalyParams is None: anomalyParams = {} if anomalyParams is None: anomalyParams = {} self.clamodel = clamodel self._version = CLAModelClassifierHelper.__VERSION__ self._classificationMaxDist = 0.1 if 'autoDetectWaitRecords' not in anomalyParams or \ anomalyParams['autoDetectWaitRecords'] is None: self._autoDetectWaitRecords = int(Configuration.get( 'nupic.model.temporalAnomaly.wait_records')) else: self._autoDetectWaitRecords = anomalyParams['autoDetectWaitRecords'] if 'autoDetectThreshold' not in anomalyParams or \ anomalyParams['autoDetectThreshold'] is None: self._autoDetectThreshold = float(Configuration.get( 'nupic.model.temporalAnomaly.auto_detect_threshold')) else: self._autoDetectThreshold = anomalyParams['autoDetectThreshold'] if 'anomalyCacheRecords' not in anomalyParams or \ anomalyParams['anomalyCacheRecords'] is None: self._history_length = int(Configuration.get( 'nupic.model.temporalAnomaly.window_length')) else: self._history_length = anomalyParams['anomalyCacheRecords'] if 'anomalyVectorType' not in anomalyParams or \ anomalyParams['anomalyVectorType'] is None: self._vectorType = str(Configuration.get( 'nupic.model.temporalAnomaly.anomaly_vector')) else: self._vectorType = anomalyParams['anomalyVectorType'] self._activeColumnCount = \ self.clamodel._getSPRegion().getSelf().getParameter('numActiveColumnsPerInhArea') # Storage for last run self._anomalyVectorLength = None self._classificationVector = numpy.array([]) self._prevPredictedColumns = numpy.array([]) self._prevTPCells = numpy.array([]) # Array of CLAClassificationRecord's used to recompute and get history self.saved_states = [] self.saved_categories = []
def _getCommonSteadyDBArgsDict(): """ Returns a dictionary of arguments for DBUtils.SteadyDB.SteadyDBConnection constructor. """ return dict( creator = pymysql, host = Configuration.get('nupic.cluster.database.host'), port = int(Configuration.get('nupic.cluster.database.port')), user = Configuration.get('nupic.cluster.database.user'), passwd = Configuration.get('nupic.cluster.database.passwd'), charset = 'utf8', use_unicode = True, setsession = ['SET AUTOCOMMIT = 1'])
def __setstate__(self, state): version = 1 if "_version" in state: version = state["_version"] # Migrate from version 1 to version 2 if version == 1: self._vectorType = str(Configuration.get( 'nupic.model.temporalAnomaly.anomaly_vector')) self._autoDetectWaitRecords = state['_classificationDelay'] elif version == 2: self._autoDetectWaitRecords = state['_classificationDelay'] elif version == 3: pass else: raise Exception("Error while deserializing %s: Invalid version %s" %(self.__class__, version)) if '_autoDetectThreshold' not in state: self._autoDetectThreshold = 1.1 for attr, value in state.iteritems(): setattr(self, attr, value) self._version = CLAModelClassifierHelper.__VERSION__
def __init__(self, steps="1", alpha=0.001, verbosity=0, implementation=None, maxCategoryCount=None): # Set default implementation if implementation is None: implementation = Configuration.get("nupic.opf.claClassifier.implementation") # Convert the steps designation to a list self.classifierImp = implementation self.steps = steps self.stepsList = eval("[%s]" % (steps)) self.alpha = alpha self.verbosity = verbosity # Initialize internal structures self._claClassifier = CLAClassifierFactory.create( steps=self.stepsList, alpha=self.alpha, verbosity=self.verbosity, implementation=implementation ) self.learningMode = True self.inferenceMode = False self.maxCategoryCount = maxCategoryCount self.recordNum = 0 self._initEphemerals() # Flag to know if the compute() function is ever called. This is to # prevent backward compatibilities issues with the customCompute() method # being called at the same time as the compute() method. Only compute() # should be called via network.run(). This flag will be removed once we # get to cleaning up the clamodel.py file. self._computeFlag = False
def __setstate__(self, state): version = 1 if "_version" in state: version = state["_version"] # Migrate from version 1 to version 2 if version == 1: self._vectorType = str( Configuration.get( 'nupic.model.temporalAnomaly.anomaly_vector')) self._autoDetectWaitRecords = state['_classificationDelay'] elif version == 2: self._autoDetectWaitRecords = state['_classificationDelay'] elif version == 3: pass else: raise Exception( "Error while deserializing %s: Invalid version %s" % (self.__class__, version)) if '_autoDetectThreshold' not in state: self._autoDetectThreshold = 1.1 for attr, value in state.iteritems(): setattr(self, attr, value) self._version = CLAModelClassifierHelper.__VERSION__
def __init__(self, steps="1", alpha=0.001, clVerbosity=0, implementation=None, maxCategoryCount=None): # Set default implementation if implementation is None: implementation = Configuration.get("nupic.opf.sdrClassifier.implementation") self.implementation = implementation # Convert the steps designation to a list self.steps = steps self.stepsList = [int(i) for i in steps.split(",")] self.alpha = alpha self.verbosity = clVerbosity # Initialize internal structures self._sdrClassifier = None self.learningMode = True self.inferenceMode = False self.maxCategoryCount = maxCategoryCount self.recordNum = 0 # Flag to know if the compute() function is ever called. This is to # prevent backward compatibilities issues with the customCompute() method # being called at the same time as the compute() method. Only compute() # should be called via network.run(). This flag will be removed once we # get to cleaning up the clamodel.py file. self._computeFlag = False
def __init__(self, steps='1', alpha=0.001, verbosity=0, implementation=None, maxCategoryCount=None): # Set default implementation if implementation is None: implementation = Configuration.get( 'nupic.opf.sdrClassifier.implementation') self.implementation = implementation # Convert the steps designation to a list self.steps = steps self.stepsList = [int(i) for i in steps.split(",")] self.alpha = alpha self.verbosity = verbosity # Initialize internal structures self._sdrClassifier = None self.learningMode = True self.inferenceMode = False self.maxCategoryCount = maxCategoryCount self.recordNum = 0 # Flag to know if the compute() function is ever called. This is to # prevent backward compatibilities issues with the customCompute() method # being called at the same time as the compute() method. Only compute() # should be called via network.run(). This flag will be removed once we # get to cleaning up the clamodel.py file. self._computeFlag = False
def __createModelCheckpoint(self): """ Create a checkpoint from the current model, and store it in a dir named after checkpoint GUID, and finally store the GUID in the Models DB """ if self._model is None or self._modelCheckpointGUID is None: return # Create an output store, if one doesn't exist already if self._predictionLogger is None: self._createPredictionLogger() predictions = io.StringIO() self._predictionLogger.checkpoint( checkpointSink=predictions, maxRows=int( Configuration.get('nupic.model.checkpoint.maxPredictionRows'))) self._model.save( os.path.join(self._experimentDir, str(self._modelCheckpointGUID))) self._jobsDAO.modelSetFields( modelID, {'modelCheckpointId': str(self._modelCheckpointGUID)}, ignoreUnchanged=True) self._logger.info( "Checkpointed Hypersearch Model: modelID: %r, " "checkpointID: %r", self._modelID, checkpointID) return
def dbValidator(): """ Let the user know what NuPIC config file is being used and whether or not they have mysql set up correctly for swarming. """ fileused = getFileUsed() # Get the values we need from NuPIC's configuration host = Configuration.get("nupic.cluster.database.host") port = int(Configuration.get("nupic.cluster.database.port")) user = Configuration.get("nupic.cluster.database.user") #BUG was here #passwd =len(Configuration.get("nupic.cluster.database.passwd")) * '*' passwd =Configuration.get("nupic.cluster.database.passwd") print "This script will validate that your MySQL is setup correctly for " print "NuPIC. MySQL is required for NuPIC swarming. The settings are" print "defined in a configuration file found in " print "$NUPIC/src/nupic/support/nupic-default.xml Out of the box those " print "settings contain MySQL's default access credentials." print print "The nupic-default.xml can be duplicated to define user specific " print "changes calling the copied file " print "$NUPIC/src/nupic/support/nupic-site.xml Refer to the " print "nupic-default.xml for additional instructions." print print "Defaults: localhost, 3306, root, no password" print print "Retrieved the following NuPIC configuration using: ", fileused print " host : ", host print " port : ", port print " user : "******" passwd : ", len(passwd)* '*' if testDbConnection(host, port, user, passwd): print "Connection successful!!" else: print ("Couldn't connect to the database or you don't have the " "permissions required to create databases and tables. " "Please ensure you have MySQL\n installed, running, " "accessible using the NuPIC configuration settings, " "and the user specified has permission to create both " "databases and tables.")
def __init__(self, min, max, stepSize=None, inertia=None, cogRate=None, socRate=None): """Construct a variable that permutes over floating point values using the Particle Swarm Optimization (PSO) algorithm. See descriptions of PSO (i.e. http://en.wikipedia.org/wiki/Particle_swarm_optimization) for references to the inertia, cogRate, and socRate parameters. Parameters: ----------------------------------------------------------------------- min: min allowed value of position max: max allowed value of position stepSize: if not None, the position must be at min + N * stepSize, where N is an integer inertia: The inertia for the particle. cogRate: This parameter controls how much the particle is affected by its distance from it's local best position socRate: This parameter controls how much the particle is affected by its distance from the global best position """ super(PermuteFloat, self).__init__() self.min = min self.max = max self.stepSize = stepSize # The particle's initial position and velocity. self._position = (self.max + self.min) / 2.0 self._velocity = (self.max - self.min) / 5.0 # The inertia, cognitive, and social components of the particle self._inertia = (float(Configuration.get("nupic.hypersearch.inertia")) if inertia is None else inertia) self._cogRate = (float(Configuration.get("nupic.hypersearch.cogRate")) if cogRate is None else cogRate) self._socRate = (float(Configuration.get("nupic.hypersearch.socRate")) if socRate is None else socRate) # The particle's local best position and the best global position. self._bestPosition = self.getPosition() self._bestResult = None
def dbValidator(): """ Let the user know what NuPIC config file is being used and whether or not they have mysql set up correctly for swarming. """ fileused = getFileUsed() # Get the values we need from NuPIC's configuration host = Configuration.get("nupic.cluster.database.host") port = int(Configuration.get("nupic.cluster.database.port")) user = Configuration.get("nupic.cluster.database.user") passwd = "*" * len(Configuration.get("nupic.cluster.database.passwd")) print "This script will validate that your MySQL is setup correctly for " print "NuPIC. MySQL is required for NuPIC swarming. The settings are" print "defined in a configuration file found in " print "$NUPIC/src/nupic/support/nupic-default.xml Out of the box those " print "settings contain MySQL's default access credentials." print print "The nupic-default.xml can be duplicated to define user specific " print "changes calling the copied file " print "$NUPIC/src/nupic/support/nupic-site.xml Refer to the " print "nupic-default.xml for additional instructions." print print "Defaults: localhost, 3306, root, no password" print print "Retrieved the following NuPIC configuration using: ", fileused print " host : ", host print " port : ", port print " user : "******" passwd : ", passwd if testDbConnection(host, port, user, passwd): print "Connection successful!!" else: print ("Couldn't connect to the database or you don't have the " "permissions required to create databases and tables. " "Please ensure you have MySQL\n installed, running, " "accessible using the NuPIC configuration settings, " "and the user specified has permission to create both " "databases and tables.")
def create(*args, **kwargs): impl = kwargs.pop("implementation", None) if impl is None: impl = Configuration.get("nupic.opf.claClassifier.implementation") if impl == "py": return SequenceClassifier(*args, **kwargs) elif impl == "cpp": raise ValueError("cpp version not yet implemented") else: raise ValueError("Invalid classifier implementation (%r). Value must be " '"py" or "cpp".' % impl)
def create(*args, **kwargs): impl = kwargs.pop('implementation', None) if impl is None: impl = Configuration.get('nupic.opf.claClassifier.implementation') if impl == 'py': return SequenceClassifier(*args, **kwargs) elif impl == 'cpp': raise ValueError('cpp version not yet implemented') else: raise ValueError('Invalid classifier implementation (%r). Value must be ' '"py" or "cpp".' % impl)
def dask_main(argv): logging.setLoggerClass(ExtendedLogger) buildID = Configuration.get('nupic.software.buildNumber', 'N/A') logPrefix = '<BUILDID=%s, WORKER=HS, WRKID=N/A, JOBID=N/A> ' % buildID ExtendedLogger.setLogPrefix(logPrefix) try: main(argv) except: logging.exception("HypersearchWorker is exiting with unhandled exception; " "argv=%r", sys.argv) raise
def _getCommonSteadyDBArgsDict(): """ Returns a dictionary of arguments for DBUtils.SteadyDB.SteadyDBConnection constructor. """ from nupic.database.client_jobs_dao import ClientJobsDAO dbname = ClientJobsDAO._getDBName() print(dbname) host = Configuration.get('nupic.cluster.database.host') port = int(Configuration.get('nupic.cluster.database.port')) user = Configuration.get('nupic.cluster.database.user') password = Configuration.get('nupic.cluster.database.passwd') os.environ['PGPASSWORD'] = password command = "SELECT 1 FROM pg_database WHERE datname= '%s'" % (dbname) args = ['psql','-At', '-U', user, '-d', 'postgres', '-h', host, '-p', str(port), '-c', command] proc = Popen(args, stdout=PIPE, stderr=PIPE) out, err = proc.communicate() if out == b'1\n': print("database exists") else: print("create database") command = 'CREATE DATABASE %s' % (dbname) call(['psql','-U', user, '-d', 'postgres', '-h', host, '-p', str(port), '-c', command]) return dict( creator = psycopg2, ping = 7, maxusage = 1, dbname = dbname, host = host, port = port, user = user, password = password, connect_timeout = 0, client_encoding = 'utf-8', keepalives_idle = 2, keepalives_interval = 2 #setsession = ['SET AUTOCOMMIT TO ON'] #use_unicode = True, )
def create(*args, **kwargs): impl = kwargs.pop('implementation', None) if impl is None: impl = Configuration.get('nupic.opf.claClassifier.implementation') if impl == 'py': return CLAClassifier(*args, **kwargs) elif impl == 'cpp': return FastCLAClassifier(*args, **kwargs) elif impl == 'diff': return CLAClassifierDiff(*args, **kwargs) else: raise ValueError('Invalid classifier implementation (%r). Value must be ' '"py" or "cpp".' % impl)
def createAndStartSwarm(client, clientInfo="", clientKey="", params="", minimumWorkers=None, maximumWorkers=None, alreadyRunning=False): """Create and start a swarm job. Args: client - A string identifying the calling client. There is a small limit for the length of the value. See ClientJobsDAO.CLIENT_MAX_LEN. clientInfo - JSON encoded dict of client specific information. clientKey - Foreign key. Limited in length, see ClientJobsDAO._initTables. params - JSON encoded dict of the parameters for the job. This can be fetched out of the database by the worker processes based on the jobID. minimumWorkers - The minimum workers to allocate to the swarm. Set to None to use the default. maximumWorkers - The maximum workers to allocate to the swarm. Set to None to use the swarm default. Set to 0 to use the maximum scheduler value. alreadyRunning - Insert a job record for an already running process. Used for testing. """ if minimumWorkers is None: minimumWorkers = Configuration.getInt( "nupic.hypersearch.minWorkersPerSwarm") if maximumWorkers is None: maximumWorkers = Configuration.getInt( "nupic.hypersearch.maxWorkersPerSwarm") return ClientJobsDAO.get().jobInsert(client=client, cmdLine="$HYPERSEARCH", clientInfo=clientInfo, clientKey=clientKey, alreadyRunning=alreadyRunning, params=params, minimumWorkers=minimumWorkers, maximumWorkers=maximumWorkers, jobType=ClientJobsDAO.JOB_TYPE_HS)
def getFileUsed(): """ Determine which NuPIC configuration file is being used and returns the name of the configuration file it is using. Either DEFAULT_CONFIG or USER_CONFIG. """ # output will be {} if the file passed into Configuration._readConfigFile # can not be found in the standard paths returned by # Configuration._getConfigPaths. output = Configuration._readConfigFile(USER_CONFIG) #pylint: disable=protected-access if output != {}: return USER_CONFIG return DEFAULT_CONFIG
def createAndStartSwarm(client, clientInfo="", clientKey="", params="", minimumWorkers=None, maximumWorkers=None, alreadyRunning=False): """Create and start a swarm job. Args: client - A string identifying the calling client. There is a small limit for the length of the value. See ClientJobsDAO.CLIENT_MAX_LEN. clientInfo - JSON encoded dict of client specific information. clientKey - Foreign key. Limited in length, see ClientJobsDAO._initTables. params - JSON encoded dict of the parameters for the job. This can be fetched out of the database by the worker processes based on the jobID. minimumWorkers - The minimum workers to allocate to the swarm. Set to None to use the default. maximumWorkers - The maximum workers to allocate to the swarm. Set to None to use the swarm default. Set to 0 to use the maximum scheduler value. alreadyRunning - Insert a job record for an already running process. Used for testing. """ if minimumWorkers is None: minimumWorkers = Configuration.getInt( "nupic.hypersearch.minWorkersPerSwarm") if maximumWorkers is None: maximumWorkers = Configuration.getInt( "nupic.hypersearch.maxWorkersPerSwarm") return ClientJobsDAO.get().jobInsert( client=client, cmdLine="$HYPERSEARCH", clientInfo=clientInfo, clientKey=clientKey, alreadyRunning=alreadyRunning, params=params, minimumWorkers=minimumWorkers, maximumWorkers=maximumWorkers, jobType=ClientJobsDAO.JOB_TYPE_HS)
def create(*args, **kwargs): """ Create a SDR classifier factory. The implementation of the SDR Classifier can be specified with the "implementation" keyword argument. The SDRClassifierFactory uses the implementation as specified in src/nupic/support/nupic-default.xml """ impl = kwargs.pop("implementation", None) if impl is None: impl = Configuration.get("nupic.opf.sdrClassifier.implementation") if impl == "py": return SDRClassifier(*args, **kwargs) else: raise ValueError("Invalid classifier implementation (%r). Value must be " '"py".' % impl)
def create(*args, **kwargs): """ Create a SDR classifier factory. The implementation of the SDR Classifier can be specified with the "implementation" keyword argument. The SDRClassifierFactory uses the implementation as specified in src/nupic/support/nupic-default.xml """ impl = kwargs.pop('implementation', None) if impl is None: impl = Configuration.get('nupic.opf.sdrClassifier.implementation') if impl == 'py': return SDRClassifier(*args, **kwargs) else: raise ValueError( 'Invalid classifier implementation (%r). Value must be ' '"py".' % impl)
def create(*args, **kwargs): """ Create a SDR classifier factory. The implementation of the SDR Classifier can be specified with the "implementation" keyword argument. The SDRClassifierFactory uses the implementation as specified in `Default NuPIC Configuration <default-config.html>`_. """ impl = kwargs.pop('implementation', None) if impl is None: impl = Configuration.get('nupic.opf.sdrClassifier.implementation') if impl == 'py': return SDRClassifier(*args, **kwargs) elif impl == 'cpp': return FastSDRClassifier(*args, **kwargs) else: raise ValueError('Invalid classifier implementation (%r). Value must be ' '"py" or "cpp".' % impl)
def create(*args, **kwargs): """ Create a SDR classifier factory. The implementation of the SDR Classifier can be specified with the "implementation" keyword argument. The SDRClassifierFactory uses the implementation as specified in `Default NuPIC Configuration <default-config.html>`_. """ impl = kwargs.pop('implementation', None) if impl is None: impl = Configuration.get('nupic.opf.sdrClassifier.implementation') if impl == 'py': return SDRClassifier(*args, **kwargs) elif impl == 'cpp': return FastSDRClassifier(*args, **kwargs) else: raise ValueError( 'Invalid classifier implementation (%r). Value must be ' '"py" or "cpp".' % impl)
def __init__(self, steps='1', alpha=0.001, clVerbosity=0, implementation=None, maxCategoryCount=None ): # Set default implementation if implementation is None: implementation = Configuration.get('nupic.opf.claClassifier.implementation') # Convert the steps designation to a list self.classifierImp = implementation self.steps = steps self.stepsList = eval("[%s]" % (steps)) self.alpha = alpha self.verbosity = clVerbosity # Initialize internal structures self._claClassifier = CLAClassifierFactory.create( steps=self.stepsList, alpha=self.alpha, verbosity=self.verbosity, implementation=implementation, ) self.learningMode = True self.inferenceMode = False self.maxCategoryCount = maxCategoryCount self.recordNum = 0 self._initEphemerals() # Flag to know if the compute() function is ever called. This is to # prevent backward compatibilities issues with the customCompute() method # being called at the same time as the compute() method. Only compute() # should be called via network.run(). This flag will be removed once we # get to cleaning up the clamodel.py file. self._computeFlag = False
def run(self): """ Run this worker. Parameters: ---------------------------------------------------------------------- retval: jobID of the job we ran. This is used by unit test code when calling this working using the --params command line option (which tells this worker to insert the job itself). """ # Easier access to options options = self._options # --------------------------------------------------------------------- # Connect to the jobs database self.logger.info("Connecting to the jobs database") cjDAO = ClientJobsDAO.get() # Get our worker ID self._workerID = cjDAO.getConnectionID() if options.clearModels: cjDAO.modelsClearAll() # ------------------------------------------------------------------------- # if params were specified on the command line, insert a new job using # them. if options.params is not None: options.jobID = cjDAO.jobInsert( client="hwTest", cmdLine="echo 'test mode'", params=options.params, alreadyRunning=True, minimumWorkers=1, maximumWorkers=1, jobType=cjDAO.JOB_TYPE_HS, ) if options.workerID is not None: wID = options.workerID else: wID = self._workerID buildID = Configuration.get("nupic.software.buildNumber", "N/A") logPrefix = "<BUILDID=%s, WORKER=HW, WRKID=%s, JOBID=%s> " % (buildID, wID, options.jobID) ExtendedLogger.setLogPrefix(logPrefix) # --------------------------------------------------------------------- # Get the search parameters # If asked to reset the job status, do that now if options.resetJobStatus: cjDAO.jobSetFields( options.jobID, fields={ "workerCompletionReason": ClientJobsDAO.CMPL_REASON_SUCCESS, "cancel": False, #'engWorkerState': None }, useConnectionID=False, ignoreUnchanged=True, ) jobInfo = cjDAO.jobInfo(options.jobID) self.logger.info("Job info retrieved: %s" % (str(clippedObj(jobInfo)))) # --------------------------------------------------------------------- # Instantiate the Hypersearch object, which will handle the logic of # which models to create when we need more to evaluate. jobParams = json.loads(jobInfo.params) # Validate job params jsonSchemaPath = os.path.join(os.path.dirname(__file__), "jsonschema", "jobParamsSchema.json") validate(jobParams, schemaPath=jsonSchemaPath) hsVersion = jobParams.get("hsVersion", None) if hsVersion == "v2": self._hs = HypersearchV2( searchParams=jobParams, workerID=self._workerID, cjDAO=cjDAO, jobID=options.jobID, logLevel=options.logLevel, ) else: raise RuntimeError("Invalid Hypersearch implementation (%s) specified" % (hsVersion)) # ===================================================================== # The main loop. try: exit = False numModelsTotal = 0 print >>sys.stderr, "reporter:status:Evaluating first model..." while not exit: # ------------------------------------------------------------------ # Choose a model to evaluate batchSize = 10 # How many to try at a time. modelIDToRun = None while modelIDToRun is None: if options.modelID is None: # ----------------------------------------------------------------- # Get the latest results on all running models and send them to # the Hypersearch implementation # This calls cjDAO.modelsGetUpdateCounters(), compares the # updateCounters with what we have cached, fetches the results for the # changed and new models, and sends those to the Hypersearch # implementation's self._hs.recordModelProgress() method. self._processUpdatedModels(cjDAO) # -------------------------------------------------------------------- # Create a new batch of models (exit, newModels) = self._hs.createModels(numModels=batchSize) if exit: break # No more models left to create, just loop. The _hs is waiting for # all remaining running models to complete, and may pick up on an # orphan if it detects one. if len(newModels) == 0: continue # Try and insert one that we will run for (modelParams, modelParamsHash, particleHash) in newModels: jsonModelParams = json.dumps(modelParams) (modelID, ours) = cjDAO.modelInsertAndStart( options.jobID, jsonModelParams, modelParamsHash, particleHash ) # Some other worker is already running it, tell the Hypersearch object # so that it doesn't try and insert it again if not ours: mParamsAndHash = cjDAO.modelsGetParams([modelID])[0] mResult = cjDAO.modelsGetResultAndStatus([modelID])[0] results = mResult.results if results is not None: results = json.loads(results) modelParams = json.loads(mParamsAndHash.params) particleHash = cjDAO.modelsGetFields(modelID, ["engParticleHash"])[0] particleInst = "%s.%s" % ( modelParams["particleState"]["id"], modelParams["particleState"]["genIdx"], ) self.logger.info( "Adding model %d to our internal DB " "because modelInsertAndStart() failed to insert it: " "paramsHash=%s, particleHash=%s, particleId='%s'", modelID, mParamsAndHash.engParamsHash.encode("hex"), particleHash.encode("hex"), particleInst, ) self._hs.recordModelProgress( modelID=modelID, modelParams=modelParams, modelParamsHash=mParamsAndHash.engParamsHash, results=results, completed=(mResult.status == cjDAO.STATUS_COMPLETED), completionReason=mResult.completionReason, matured=mResult.engMatured, numRecords=mResult.numRecords, ) else: modelIDToRun = modelID break else: # A specific modelID was passed on the command line modelIDToRun = int(options.modelID) mParamsAndHash = cjDAO.modelsGetParams([modelIDToRun])[0] modelParams = json.loads(mParamsAndHash.params) modelParamsHash = mParamsAndHash.engParamsHash # Make us the worker cjDAO.modelSetFields(modelIDToRun, dict(engWorkerConnId=self._workerID)) if False: # Change the hash and params of the old entry so that we can # create a new model with the same params for attempt in range(1000): paramsHash = hashlib.md5("OrphanParams.%d.%d" % (modelIDToRun, attempt)).digest() particleHash = hashlib.md5("OrphanParticle.%d.%d" % (modelIDToRun, attempt)).digest() try: cjDAO.modelSetFields( modelIDToRun, dict(engParamsHash=paramsHash, engParticleHash=particleHash) ) success = True except: success = False if success: break if not success: raise RuntimeError( "Unexpected failure to change paramsHash and " "particleHash of orphaned model" ) (modelIDToRun, ours) = cjDAO.modelInsertAndStart( options.jobID, mParamsAndHash.params, modelParamsHash ) # ^^^ end while modelIDToRun ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # --------------------------------------------------------------- # We have a model, evaluate it now # All done? if exit: break # Run the model now self.logger.info( "RUNNING MODEL GID=%d, paramsHash=%s, params=%s", modelIDToRun, modelParamsHash.encode("hex"), modelParams, ) # --------------------------------------------------------------------- # Construct model checkpoint GUID for this model: # jobParams['persistentJobGUID'] contains the client's (e.g., API Server) # persistent, globally-unique model identifier, which is what we need; persistentJobGUID = jobParams["persistentJobGUID"] assert persistentJobGUID, "persistentJobGUID: %r" % (persistentJobGUID,) modelCheckpointGUID = jobInfo.client + "_" + persistentJobGUID + ("_" + str(modelIDToRun)) self._hs.runModel( modelID=modelIDToRun, jobID=options.jobID, modelParams=modelParams, modelParamsHash=modelParamsHash, jobsDAO=cjDAO, modelCheckpointGUID=modelCheckpointGUID, ) # TODO: don't increment for orphaned models numModelsTotal += 1 self.logger.info("COMPLETED MODEL GID=%d; EVALUATED %d MODELs", modelIDToRun, numModelsTotal) print >>sys.stderr, "reporter:status:Evaluated %d models..." % (numModelsTotal) print >>sys.stderr, "reporter:counter:HypersearchWorker,numModels,1" if options.modelID is not None: exit = True # ^^^ end while not exit finally: # Provide Hypersearch instance an opportunity to clean up temporary files self._hs.close() self.logger.info("FINISHED. Evaluated %d models." % (numModelsTotal)) print >>sys.stderr, "reporter:status:Finished, evaluated %d models" % (numModelsTotal) return options.jobID
jobID = None completionReason = ClientJobsDAO.CMPL_REASON_SUCCESS completionMsg = "Success" try: jobID = hst.run() except Exception, e: jobID = hst._options.jobID completionReason = ClientJobsDAO.CMPL_REASON_ERROR completionMsg = "ERROR: %s" % (e,) raise finally: if jobID is not None: cjDAO = ClientJobsDAO.get() cjDAO.jobSetCompleted(jobID=jobID, completionReason=completionReason, completionMsg=completionMsg) return jobID if __name__ == "__main__": logging.setLoggerClass(ExtendedLogger) buildID = Configuration.get("nupic.software.buildNumber", "N/A") logPrefix = "<BUILDID=%s, WORKER=HS, WRKID=N/A, JOBID=N/A> " % buildID ExtendedLogger.setLogPrefix(logPrefix) try: main(sys.argv) except: logging.exception("HypersearchWorker is exiting with unhandled exception; " "argv=%r", sys.argv) raise
# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see http://www.gnu.org/licenses. # # http://numenta.org/licenses/ # ---------------------------------------------------------------------- import pymysql from nupic.support.configuration import Configuration # Get the values we need from NuPIC's configuration host = Configuration.get('nupic.cluster.database.host') port = int(Configuration.get('nupic.cluster.database.port')) user = Configuration.get('nupic.cluster.database.user') passwd = Configuration.get('nupic.cluster.database.passwd') print print "This script will validate that your MySQL is setup correctly for NuPIC." print "MySQL is required for NuPIC swarming. The settings are defined in " print "NuPIC's configuration files in $NUPIC/conf/default/nupic-default.xml " print "Out of the box those settings contain MySQL's default access " print "credentials." print print "Retrieved the following settings from NuPIC configuration:" print " host : ", host print " port : ", port print " user : ", user
def run(self): """ Run this worker. Parameters: ---------------------------------------------------------------------- retval: jobID of the job we ran. This is used by unit test code when calling this working using the --params command line option (which tells this worker to insert the job itself). """ # Easier access to options options = self._options # --------------------------------------------------------------------- # Connect to the jobs database self.logger.info("Connecting to the jobs database") cjDAO = ClientJobsDAO.get() # Get our worker ID self._workerID = cjDAO.getConnectionID() if options.clearModels: cjDAO.modelsClearAll() # ------------------------------------------------------------------------- # if params were specified on the command line, insert a new job using # them. if options.params is not None: options.jobID = cjDAO.jobInsert(client='hwTest', cmdLine="echo 'test mode'", params=options.params, alreadyRunning=True, minimumWorkers=1, maximumWorkers=1, jobType = cjDAO.JOB_TYPE_HS) if options.workerID is not None: wID = options.workerID else: wID = self._workerID buildID = Configuration.get('nupic.software.buildNumber', 'N/A') logPrefix = '<BUILDID=%s, WORKER=HW, WRKID=%s, JOBID=%s> ' % \ (buildID, wID, options.jobID) ExtendedLogger.setLogPrefix(logPrefix) # --------------------------------------------------------------------- # Get the search parameters # If asked to reset the job status, do that now if options.resetJobStatus: cjDAO.jobSetFields(options.jobID, fields={'workerCompletionReason': ClientJobsDAO.CMPL_REASON_SUCCESS, 'cancel': False, #'engWorkerState': None }, useConnectionID=False, ignoreUnchanged=True) jobInfo = cjDAO.jobInfo(options.jobID) self.logger.info("Job info retrieved: %s" % (str(clippedObj(jobInfo)))) # --------------------------------------------------------------------- # Instantiate the Hypersearch object, which will handle the logic of # which models to create when we need more to evaluate. jobParams = json.loads(jobInfo.params) # Validate job params jsonSchemaPath = os.path.join(os.path.dirname(__file__), "jsonschema", "jobParamsSchema.json") validate(jobParams, schemaPath=jsonSchemaPath) hsVersion = jobParams.get('hsVersion', None) if hsVersion == 'v2': self._hs = HypersearchV2(searchParams=jobParams, workerID=self._workerID, cjDAO=cjDAO, jobID=options.jobID, logLevel=options.logLevel) else: raise RuntimeError("Invalid Hypersearch implementation (%s) specified" \ % (hsVersion)) # ===================================================================== # The main loop. try: exit = False numModelsTotal = 0 print >>sys.stderr, "reporter:status:Evaluating first model..." while not exit: # ------------------------------------------------------------------ # Choose a model to evaluate batchSize = 10 # How many to try at a time. modelIDToRun = None while modelIDToRun is None: if options.modelID is None: # ----------------------------------------------------------------- # Get the latest results on all running models and send them to # the Hypersearch implementation # This calls cjDAO.modelsGetUpdateCounters(), compares the # updateCounters with what we have cached, fetches the results for the # changed and new models, and sends those to the Hypersearch # implementation's self._hs.recordModelProgress() method. self._processUpdatedModels(cjDAO) # -------------------------------------------------------------------- # Create a new batch of models (exit, newModels) = self._hs.createModels(numModels = batchSize) if exit: break # No more models left to create, just loop. The _hs is waiting for # all remaining running models to complete, and may pick up on an # orphan if it detects one. if len(newModels) == 0: continue # Try and insert one that we will run for (modelParams, modelParamsHash, particleHash) in newModels: jsonModelParams = json.dumps(modelParams) (modelID, ours) = cjDAO.modelInsertAndStart(options.jobID, jsonModelParams, modelParamsHash, particleHash) # Some other worker is already running it, tell the Hypersearch object # so that it doesn't try and insert it again if not ours: mParamsAndHash = cjDAO.modelsGetParams([modelID])[0] mResult = cjDAO.modelsGetResultAndStatus([modelID])[0] results = mResult.results if results is not None: results = json.loads(results) modelParams = json.loads(mParamsAndHash.params) particleHash = cjDAO.modelsGetFields(modelID, ['engParticleHash'])[0] particleInst = "%s.%s" % ( modelParams['particleState']['id'], modelParams['particleState']['genIdx']) self.logger.info("Adding model %d to our internal DB " \ "because modelInsertAndStart() failed to insert it: " \ "paramsHash=%s, particleHash=%s, particleId='%s'", modelID, mParamsAndHash.engParamsHash.encode('hex'), particleHash.encode('hex'), particleInst) self._hs.recordModelProgress(modelID = modelID, modelParams = modelParams, modelParamsHash = mParamsAndHash.engParamsHash, results = results, completed = (mResult.status == cjDAO.STATUS_COMPLETED), completionReason = mResult.completionReason, matured = mResult.engMatured, numRecords = mResult.numRecords) else: modelIDToRun = modelID break else: # A specific modelID was passed on the command line modelIDToRun = int(options.modelID) mParamsAndHash = cjDAO.modelsGetParams([modelIDToRun])[0] modelParams = json.loads(mParamsAndHash.params) modelParamsHash = mParamsAndHash.engParamsHash # Make us the worker cjDAO.modelSetFields(modelIDToRun, dict(engWorkerConnId=self._workerID)) if False: # Change the hash and params of the old entry so that we can # create a new model with the same params for attempt in range(1000): paramsHash = hashlib.md5("OrphanParams.%d.%d" % (modelIDToRun, attempt)).digest() particleHash = hashlib.md5("OrphanParticle.%d.%d" % (modelIDToRun, attempt)).digest() try: cjDAO.modelSetFields(modelIDToRun, dict(engParamsHash=paramsHash, engParticleHash=particleHash)) success = True except: success = False if success: break if not success: raise RuntimeError("Unexpected failure to change paramsHash and " "particleHash of orphaned model") (modelIDToRun, ours) = cjDAO.modelInsertAndStart(options.jobID, mParamsAndHash.params, modelParamsHash) # ^^^ end while modelIDToRun ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # --------------------------------------------------------------- # We have a model, evaluate it now # All done? if exit: break # Run the model now self.logger.info("RUNNING MODEL GID=%d, paramsHash=%s, params=%s", modelIDToRun, modelParamsHash.encode('hex'), modelParams) # --------------------------------------------------------------------- # Construct model checkpoint GUID for this model: # jobParams['persistentJobGUID'] contains the client's (e.g., API Server) # persistent, globally-unique model identifier, which is what we need; persistentJobGUID = jobParams['persistentJobGUID'] assert persistentJobGUID, "persistentJobGUID: %r" % (persistentJobGUID,) modelCheckpointGUID = jobInfo.client + "_" + persistentJobGUID + ( '_' + str(modelIDToRun)) self._hs.runModel(modelID=modelIDToRun, jobID = options.jobID, modelParams=modelParams, modelParamsHash=modelParamsHash, jobsDAO=cjDAO, modelCheckpointGUID=modelCheckpointGUID) # TODO: don't increment for orphaned models numModelsTotal += 1 self.logger.info("COMPLETED MODEL GID=%d; EVALUATED %d MODELs", modelIDToRun, numModelsTotal) print >>sys.stderr, "reporter:status:Evaluated %d models..." % \ (numModelsTotal) print >>sys.stderr, "reporter:counter:HypersearchWorker,numModels,1" if options.modelID is not None: exit = True # ^^^ end while not exit finally: # Provide Hypersearch instance an opportunity to clean up temporary files self._hs.close() self.logger.info("FINISHED. Evaluated %d models." % (numModelsTotal)) print >>sys.stderr, "reporter:status:Finished, evaluated %d models" % (numModelsTotal) return options.jobID
try: jobID = hst.run() except Exception, e: jobID = hst._options.jobID completionReason = ClientJobsDAO.CMPL_REASON_ERROR completionMsg = "ERROR: %s" % (e,) raise finally: if jobID is not None: cjDAO = ClientJobsDAO.get() cjDAO.jobSetCompleted(jobID=jobID, completionReason=completionReason, completionMsg=completionMsg) return jobID if __name__ == "__main__": logging.setLoggerClass(ExtendedLogger) buildID = Configuration.get('nupic.software.buildNumber', 'N/A') logPrefix = '<BUILDID=%s, WORKER=HS, WRKID=N/A, JOBID=N/A> ' % buildID ExtendedLogger.setLogPrefix(logPrefix) try: main(sys.argv) except: logging.exception("HypersearchWorker is exiting with unhandled exception; " "argv=%r", sys.argv) raise
def __init__(self, modelID, jobID, predictedField, experimentDir, reportKeyPatterns, optimizeKeyPattern, jobsDAO, modelCheckpointGUID, logLevel=None, predictionCacheMaxRecords=None): """ Parameters: ------------------------------------------------------------------------- modelID: ID for this model in the models table jobID: ID for this hypersearch job in the jobs table predictedField: Name of the input field for which this model is being optimized experimentDir: Directory path containing the experiment's description.py script reportKeyPatterns: list of items from the results dict to include in the report. These can be regular expressions. optimizeKeyPattern: Which report item, if any, we will be optimizing for. This can also be a regular expression, but is an error if it matches more than one key from the experiment's results. jobsDAO: Jobs data access object - the interface to the jobs database which has the model's table. modelCheckpointGUID: A persistent, globally-unique identifier for constructing the model checkpoint key. If None, then don't bother creating a model checkpoint. logLevel: override logging level to this value, if not None predictionCacheMaxRecords: Maximum number of records for the prediction output cache. Pass None for default value. """ # ----------------------------------------------------------------------- # Initialize class constants # ----------------------------------------------------------------------- self._MIN_RECORDS_TO_BE_BEST = int( Configuration.get('nupic.hypersearch.bestModelMinRecords')) self._MATURITY_MAX_CHANGE = float( Configuration.get('nupic.hypersearch.maturityPctChange')) self._MATURITY_NUM_POINTS = int( Configuration.get('nupic.hypersearch.maturityNumPoints')) # ----------------------------------------------------------------------- # Initialize instance variables # ----------------------------------------------------------------------- self._modelID = modelID self._jobID = jobID self._predictedField = predictedField self._experimentDir = experimentDir self._reportKeyPatterns = reportKeyPatterns self._optimizeKeyPattern = optimizeKeyPattern self._jobsDAO = jobsDAO self._modelCheckpointGUID = modelCheckpointGUID self._predictionCacheMaxRecords = predictionCacheMaxRecords self._isMaturityEnabled = bool( int(Configuration.get('nupic.hypersearch.enableModelMaturity'))) self._logger = logging.getLogger(".".join([ 'com.numenta', self.__class__.__module__, self.__class__.__name__ ])) self._optimizedMetricLabel = None self._reportMetricLabels = [] # Our default completion reason self._cmpReason = ClientJobsDAO.CMPL_REASON_EOF if logLevel is not None: self._logger.setLevel(logLevel) # The manager object to compute the metrics for this model self.__metricMgr = None # Will be set to a new instance of OPFTaskDriver by __runTask() #self.__taskDriver = None # Current task control parameters. Will be set by __runTask() self.__task = None # Will be set to a new instance of PeriodicActivityManager by __runTask() self._periodic = None # Will be set to streamDef string by _runTask() self._streamDef = None # Will be set to new OpfExperiment instance by run() self._model = None # Will be set to new InputSource by __runTask() self._inputSource = None # 0-based index of the record being processed; # Initialized and updated by __runTask() self._currentRecordIndex = None # Interface to write predictions to a persistent storage self._predictionLogger = None # In-memory cache for predictions. Predictions are written here for speed # when they don't need to be written to a persistent store self.__predictionCache = deque() # Flag to see if this is the best model in the job (as determined by the # model chooser logic). This is essentially a cache of the value in the # ClientJobsDB self._isBestModel = False # Flag to see if there is a best model (not necessarily this one) # stored in the DB self._isBestModelStored = False # ----------------------------------------------------------------------- # Flags for model cancelation/checkpointing # ----------------------------------------------------------------------- # Flag to see if the job that this model is part of self._isCanceled = False # Flag to see if model was killed, either by the model terminator or by the # hypsersearch implementation (ex. the a swarm is killed/matured) self._isKilled = False # Flag to see if the model is matured. In most cases, this means that we # should stop running the model. The only execption is if this model is the # best model for the job, in which case it should continue running. self._isMature = False # Event to see if interrupt signal has been sent self._isInterrupted = threading.Event() # ----------------------------------------------------------------------- # Facilities for measuring model maturity # ----------------------------------------------------------------------- # List of tuples, (iteration, metric), used to see if the model has 'matured' self._metricRegression = regression.AveragePctChange( windowSize=self._MATURITY_NUM_POINTS) self.__loggedMetricPatterns = []
# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see http://www.gnu.org/licenses. # # http://numenta.org/licenses/ # ---------------------------------------------------------------------- import pymysql from nupic.support.configuration import Configuration # Get the values we need from NuPIC's configuration host = Configuration.get('nupic.cluster.database.host') port = int(Configuration.get('nupic.cluster.database.port')) user = Configuration.get('nupic.cluster.database.user') passwd = Configuration.get('nupic.cluster.database.passwd') print print "This script will validate that your MySQL is setup correctly for NuPIC." print "MySQL is required for NuPIC swarming. The settings are defined in " print "NuPIC's configuration files in $NUPIC/nupic/support/nupic-default.xml " print "Out of the box those settings contain MySQL's default access " print "credentials." print print "Retrieved the following settings from NuPIC configuration:" print " host : ", host print " port : ", port print " user : ", user