def run(dataName=None, allocModelName=None, obsModelName=None, algName=None, \ doSaveToDisk=True, doWriteStdOut=True, taskID=None, **kwargs): ''' Fit specified model to data with learning algorithm. Usage ------- To fit a Gauss MixModel to a custom dataset defined in matrix X >> Data = bnpy.data.XData(X) >> hmodel = run(Data, 'MixModel', 'Gauss', 'EM', K=3, nLap=10) To load a dataset specified in a specific script For example, 2D toy data in demodata/AsteriskK8.py >> hmodel = run('AsteriskK8', 'MixModel', 'Gauss', 'VB', K=3) To run 5 tasks (separate initializations) and get best of 5 runs: >> opts = dict(K=8, nLap=100, printEvery=0) >> hmodel = run('AsteriskK8','MixModel','Gauss','VB', nTask=5, **opts) Args ------- dataName : either one of * bnpy Data object, * string filesystem path of Data module within BNPYDATADIR allocModelName : string name of allocation (latent structure) model {MixModel, DPMixModel, AdmixModel, HMM, etc.} obsModelName : string name of observation (likelihood) model {Gauss, ZMGauss, WordCount, etc.} **kwargs : keyword args defining properties of the model or alg see Doc for details [TODO] Returns ------- hmodel : best model fit to the dataset (across nTask runs) LP : local parameters of that best model on the dataset evBound : log evidence (ELBO) for the best model on the dataset scalar, real value where larger value implies better model ''' hasReqArgs = dataName is not None hasReqArgs &= allocModelName is not None hasReqArgs &= obsModelName is not None hasReqArgs &= algName is not None if hasReqArgs: ReqArgs = dict(dataName=dataName, allocModelName=allocModelName, obsModelName=obsModelName, algName=algName) else: ReqArgs = BNPYArgParser.parseRequiredArgs() dataName = ReqArgs['dataName'] allocModelName = ReqArgs['allocModelName'] obsModelName = ReqArgs['obsModelName'] algName = ReqArgs['algName'] KwArgs, UnkArgs = BNPYArgParser.parseKeywordArgs(ReqArgs, **kwargs) jobname = KwArgs['OutputPrefs']['jobname'] if taskID is None: starttaskid = KwArgs['OutputPrefs']['taskid'] else: starttaskid = taskID KwArgs['OutputPrefs']['taskid'] = taskID nTask = KwArgs['OutputPrefs']['nTask'] bestInfo = None bestEvBound = -np.inf for taskid in range(starttaskid, starttaskid + nTask): hmodel, LP, Info = _run_task_internal(jobname, taskid, nTask, ReqArgs, KwArgs, UnkArgs, dataName, allocModelName, obsModelName, algName, doSaveToDisk, doWriteStdOut) if (Info['evBound'] > bestEvBound): bestModel = hmodel bestLP = LP bestEvBound = Info['evBound'] bestInfo = Info return bestModel, bestLP, bestInfo
def run(dataName=None, allocModelName=None, obsModelName=None, algName=None, doSaveToDisk=True, doWriteStdOut=True, taskID=None, **kwargs): """ Fit specified model to data with learning algorithm. Args ------- dataName : either one of * bnpy Data object, * string name of python file within BNPYDATADIR allocModelName : string name of allocation (latent structure) model obsModelName : string name of observation (likelihood) model **kwargs : keyword args defining properties of the model or alg Returns ------- hmodel : best model fit to the dataset (across nTask runs) Info : dict of information about this best model """ hasReqArgs = dataName is not None hasReqArgs &= allocModelName is not None hasReqArgs &= obsModelName is not None hasReqArgs &= algName is not None if hasReqArgs: ReqArgs = dict(dataName=dataName, allocModelName=allocModelName, obsModelName=obsModelName, algName=algName) else: ReqArgs = BNPYArgParser.parseRequiredArgs() dataName = ReqArgs['dataName'] allocModelName = ReqArgs['allocModelName'] obsModelName = ReqArgs['obsModelName'] algName = ReqArgs['algName'] KwArgs, UnkArgs = BNPYArgParser.parseKeywordArgs(ReqArgs, **kwargs) KwArgs['OutputPrefs']['doSaveToDisk'] = doSaveToDisk KwArgs['OutputPrefs']['doWriteStdOut'] = doWriteStdOut jobname = KwArgs['OutputPrefs']['jobname'] # Update stored numerical options via keyword args bnpy.util.NumericUtil.UpdateConfig(**UnkArgs) if taskID is None: starttaskid = KwArgs['OutputPrefs']['taskid'] else: starttaskid = taskID KwArgs['OutputPrefs']['taskid'] = taskID nTask = KwArgs['OutputPrefs']['nTask'] best_info_dict = None best_loss = np.inf for taskid in range(starttaskid, starttaskid + nTask): hmodel, info_dict = _run_task_internal(jobname, taskid, nTask, ReqArgs, KwArgs, UnkArgs, dataName, allocModelName, obsModelName, algName, doSaveToDisk, doWriteStdOut) if (taskid == starttaskid or info_dict['loss'] < best_loss): bestModel = hmodel best_loss = info_dict['loss'] best_info_dict = info_dict return bestModel, best_info_dict