Exemplo n.º 1
0
def get_minibatch_iterator(seed=8675309,
                           dataorderseed=0,
                           nBatch=10,
                           nObsBatch=None,
                           nObsTotal=25000,
                           nLap=1,
                           startLap=0,
                           **kwargs):
    '''
    Args
    --------
    seed : integer seed for random number generator,
            used for actually *generating* the data
    dataorderseed : integer seed that determines
                     (a) how data is divided into minibatches
                     (b) order these minibatches are traversed

   Returns
    -------
      bnpy MinibatchIterator object, with nObsTotal observations
        divided into nBatch batches
  '''
    X, TrueZ = get_X(seed, nObsTotal)
    Data = XData(X=X)
    Data.summary = get_data_info()
    DataIterator = MinibatchIterator(Data,
                                     nBatch=nBatch,
                                     nObsBatch=nObsBatch,
                                     nLap=nLap,
                                     startLap=startLap,
                                     dataorderseed=dataorderseed)
    return DataIterator
Exemplo n.º 2
0
 def get_size_of_batch_from_file(self, filepath):
     if filepath.endswith('.ldac'):
         with open(filepath, 'r') as f:
             return len(f.readlines())
     elif self.dataset_type == 'GroupXData':
         return XData.read_file(filepath).nDoc
     elif self.dataset_type == 'XData':
         return XData.read_file(filepath).nObs
     else:
         raise ValueError('Unrecognized file type: ' + filepath)
     """
def get_data(**kwargs):
    '''
      Args
      -------
      filepath

      Returns
      -------
        Data : bnpy XData object, with nObsTotal observations
    '''
    X = np.loadtxt(filepath, dtype=np.float64)
    Data = XData(X=X)
    Data.name = get_short_name()
    Data.summary = get_data_info()
    return Data
Exemplo n.º 4
0
 def MakeData(self, K=3, Nperclass=1000):
     ''' Creates simple toy dataset for testing.
     Simple 3 component data with eye covar and distinct, well-sep means
       mu0 = [-10, -10]
       mu1 = [0, 0]
       mu2 = [10, 10]
 '''
     PRNG = np.random.RandomState(8675309)
     # Means:  [-10 -10; 0 0; 10 10]
     Mu = np.zeros((3, 2))
     Mu[0] = Mu[0] - 10
     Mu[2] = Mu[2] + 10
     # Covariances: identity
     Sigma = np.eye(2)
     # Generate data from K components, each with Nperclass examples
     self.TrueResp = np.zeros((K * Nperclass, K))
     Xlist = list()
     for k in range(K):
         Xcur = mvnrand(Mu[k], Sigma, Nperclass, PRNG)
         Xlist.append(Xcur)
         self.TrueResp[k * Nperclass:(k + 1) * Nperclass, k] = 1.0
     X = np.vstack(Xlist)
     self.Data = XData(X=X)
     self.Mu = Mu
     assert np.abs(self.TrueResp.sum() - self.Data.nObs) < 1e-2
Exemplo n.º 5
0
def get_data(seed=8675309, nObsTotal=25000, **kwargs):
  '''
    Args
    -------
    seed : integer seed for random number generator,
            used for actually *generating* the data
    nObsTotal : total number of observations for the dataset.

    Returns
    -------
      Data : bnpy XData object, with nObsTotal observations
  '''
  X, TrueZ = get_X(seed, nObsTotal)
  Data = XData(X=X, TrueZ=TrueZ)
  Data.summary = get_data_info()
  return Data
Exemplo n.º 6
0
 def setUp(self):
     X = np.random.randn(100, 3)
     self.Data = XData(X=X)
     aPDict = dict(alpha0=1.0)
     oPDict = dict(dF=5, ECovMat='eye', sF=1.0)
     self.hmodel = HModel.CreateEntireModel('VB', 'MixModel', 'ZMGauss',
                                            aPDict, oPDict, self.Data)
Exemplo n.º 7
0
    def MakeData(self, K=3, Nperclass=1000):
        ''' Create simple toy XData with K components, add as attribute to self

        Simple 3 component data with eye covar and distinct, well-sep means
        mu0 = [-10, -10]
        mu1 = [0, 0]
        mu2 = [10, 10]
    '''
        Mu = np.zeros((3, 2))
        Mu[0] = Mu[0] - 10
        Mu[2] = Mu[2] + 10
        Sigma = np.eye(2)
        self.TrueResp = np.zeros((K * Nperclass, K))
        self.DupResp = np.zeros((K * Nperclass, 2 * K))
        Xlist = list()
        for k in range(K):
            Xcur = mvnrand(Mu[k], Sigma, Nperclass)
            Xlist.append(Xcur)
            self.TrueResp[k * Nperclass:(k + 1) * Nperclass, k] = 1.0
            start = k * Nperclass
            stop = (k + 1) * Nperclass
            half = 0.5 * (start + stop)
            self.DupResp[start:half, k] = 1.0
            self.DupResp[half:stop, K + k] = 1.0
        X = np.vstack(Xlist)
        self.Data = XData(X=X)
        self.Mu = Mu
        assert np.abs(self.TrueResp.sum() - self.Data.nObs) < 1e-2
        assert np.abs(self.DupResp.sum() - self.Data.nObs) < 1e-2
Exemplo n.º 8
0
def generateRandomBinaryDataFromMixture(**kwargs):
    for key in Defaults:
        if key not in kwargs:
            kwargs[key] = Defaults[key]
    phi = makePhi(**kwargs)
    nObsTotal = kwargs['nObsTotal']

    PRNG = np.random.RandomState(kwargs['seed'])

    # Select number of observations from each cluster
    beta = 1.0 / K * np.ones(K)
    if nObsTotal < 2 * K:
        # force examples from every cluster
        nPerCluster = np.ceil(nObsTotal / K) * np.ones(K)
    else:
        nPerCluster = as1D(PRNG.multinomial(nObsTotal, beta, size=1))
    nPerCluster = np.int32(nPerCluster)

    # Generate data from each cluster!
    X = np.zeros((nObsTotal, D))
    Z = np.zeros(nObsTotal, dtype=np.int32)
    start = 0
    for k in xrange(K):
        stop = start + nPerCluster[k]
        X[start:stop] = np.float64(
            PRNG.rand(nPerCluster[k], D) < phi[k, :][np.newaxis, :])
        Z[start:stop] = k
        start = stop

    TrueParams = dict()
    TrueParams['beta'] = beta
    TrueParams['phi'] = phi
    TrueParams['Z'] = Z
    return XData(X, TrueParams=TrueParams)
Exemplo n.º 9
0
def get_data(seed=8675309, nObsTotal=25000, **kwargs):
    '''
    Args
    -------
    seed : integer seed for random number generator,
            used for actually *generating* the data
    nObsTotal : total number of observations for the dataset.

    Returns
    -------
      Data : bnpy XData object, with nObsTotal observations
  '''
    X, TrueZ = get_X(seed, nObsTotal)
    Data = XData(X=X, TrueZ=TrueZ)
    Data.summary = get_data_info()
    return Data
Exemplo n.º 10
0
 def setUp(self):
     X = np.random.randn(100, 3)
     self.Data = XData(X=X)
     aPDict = dict(alpha0=1.0)
     oPDict = dict(min_covar=1e-9)
     self.hmodel = HModel.CreateEntireModel('EM', 'MixModel', 'ZMGauss',
                                            aPDict, oPDict, self.Data)
def get_data(seed=8675309, nObsTotal=25000, **kwargs):
    ''' Create and return toy dataset from 1D standard normal distribution.

    Args
    -------
    seed : integer seed for random number generator,
        used for actually *generating* the data
    nObsTotal : total number of observations for the dataset.

    Returns
    -------
    Data : bnpy XData object, with nObsTotal observations
    '''
    X, TrueZ = generate_data(seed, nObsTotal)
    Data = XData(X=X, TrueZ=TrueZ)
    Data.name = get_short_name()
    Data.summary = get_data_info()
    return Data
Exemplo n.º 12
0
 def MakeData(self, K=5, Nperclass=1000):
   PRNG = np.random.RandomState(867)
   sigma = 1e-3
   Xlist = list()
   for k in range(K):
     Xcur = sigma * PRNG.randn(Nperclass, 2)
     Xcur += k
     Xlist.append(Xcur)
   self.Data = XData(np.vstack(Xlist))
Exemplo n.º 13
0
def get_data(seed=8675309, nObsTotal=None, nPerState=20, **kwargs):
    '''
      Args
      -------
      seed : integer seed for random number generator,
              used for actually *generating* the data
      nObsTotal : total number of observations for the dataset.

      Returns
      -------
        Data : bnpy XData object, with nObsTotal observations
    '''
    if nObsTotal is not None:
        nPerState = nObsTotal // K
    X, TrueZ = genToyData(seed=seed, nPerState=nPerState)
    Data = XData(X=X, TrueZ=TrueZ)
    Data.name = get_short_name()
    Data.summary = get_data_info()
    return Data
Exemplo n.º 14
0
  def setUp(self):
    PRNG = np.random.RandomState(867)
    X = PRNG.randn(100,2)
    self.Data = XData(X=X)

    aPDict = dict(alpha0=1.0)
    oPDict = dict(min_covar=1e-9)
    self.hmodel = HModel.CreateEntireModel('EM','MixModel','ZMGauss', aPDict, oPDict, self.Data)
    initParams = dict(initname='randexamples', seed=0, K=5)
    self.hmodel.init_global_params(self.Data, **initParams)
Exemplo n.º 15
0
def get_minibatch_iterator(seed=8675309, dataorderseed=0, nBatch=10, nObsBatch=None, nObsTotal=25000, nLap=1, startLap=0, **kwargs):
  '''
    Args
    --------
    seed : integer seed for random number generator,
            used for actually *generating* the data
    dataorderseed : integer seed that determines
                     (a) how data is divided into minibatches
                     (b) order these minibatches are traversed

   Returns
    -------
      bnpy MinibatchIterator object, with nObsTotal observations
        divided into nBatch batches
  '''
  X, TrueZ = get_X(seed, nObsTotal)
  Data = XData(X=X)
  Data.summary = get_data_info()
  DataIterator = MinibatchIterator(Data, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, startLap=startLap, dataorderseed=dataorderseed)
  return DataIterator
Exemplo n.º 16
0
 def MakeData(self, nObsC=200):
     if self.obsM is None:
         return
     XList = list()
     np.random.seed(505)
     for k in range(self.obsM.K):
         Sigma = self.obsM.get_covar_mat_for_comp(k)
         mu = self.obsM.get_mean_for_comp(k)
         Xcur = mvnrand(mu, Sigma, nObsC)
         XList.append(Xcur)
     X = np.vstack(XList)
     self.nObsC = nObsC
     self.Data = XData(X=X)
Exemplo n.º 17
0
def init_global_params(hmodel,
                       Data,
                       initname='randexamples',
                       seed=0,
                       K=0,
                       **kwargs):
    PRNG = np.random.RandomState(seed)
    X = Data.X
    if initname == 'randexamples':
        ''' Choose K items uniformly at random from the Data
        then component params by M-step given those single items
    '''
        resp = np.zeros((Data.nObs, K))
        permIDs = PRNG.permutation(Data.nObs).tolist()
        for k in xrange(K):
            resp[permIDs[k], k] = 1.0
    elif initname == 'randexamplesbydist':
        ''' Choose K items from the Data,
        selecting the first at random,
        then subsequently proportional to euclidean distance to the closest item
    '''
        objID = discrete_single_draw(np.ones(Data.nObs), PRNG)
        chosenObjIDs = list([objID])
        minDistVec = np.inf * np.ones(Data.nObs)
        for k in range(1, K):
            curDistVec = np.sum((Data.X - Data.X[objID])**2, axis=1)
            minDistVec = np.minimum(minDistVec, curDistVec)
            objID = discrete_single_draw(minDistVec, PRNG)
            chosenObjIDs.append(objID)
        resp = np.zeros((Data.nObs, K))
        for k in xrange(K):
            resp[chosenObjIDs[k], k] = 1.0
    elif initname == 'randsoftpartition':
        ''' Randomly assign all data items some mass in each of K components
        then create component params by M-step given that soft partition
    '''
        resp = PRNG.rand(Data.nObs, K)
        resp = resp / np.sum(resp, axis=1)[:, np.newaxis]

    elif initname == 'randomnaive':
        ''' Generate K "fake" examples from the diagonalized data covariance,
        creating params by assigning each "fake" example to a component.
    '''
        Sig = np.sqrt(np.diag(np.cov(Data.X.T)))
        Xfake = Sig * PRNG.randn(K, Data.dim)
        Data = XData(Xfake)
        resp = np.eye(K)

    LP = dict(resp=resp)
    SS = hmodel.get_global_suff_stats(Data, LP)
    hmodel.update_global_params(SS)
Exemplo n.º 18
0
 def setUp(self):
     oDict = dict(inferType='EM', min_covar=0.0)
     compDictList = [dict(Sigma=np.eye(2)), dict(Sigma=100 * np.eye(2))]
     obsPrior = None
     self.obsM = ZMGaussObsModel.CreateWithAllComps(oDict, obsPrior,
                                                    compDictList)
     self.C = 10
     XList = list()
     for k in range(self.obsM.K):
         Xcur = np.random.randn(self.C, 2)
         sig = np.sqrt(self.obsM.comp[k].Sigma[0, 0])
         XList.append(sig * Xcur)
     self.Data = XData(X=np.vstack(XList))
     print self.Data.X
Exemplo n.º 19
0
    def loadDataForBatch(self, batchID):
        ''' Load the data assigned to a particular batch

        Returns
        -------
        Dchunk : bnpy.data.DataObj subclass
        '''
        dpath = self.datafileList[batchID]
        if dpath.endswith('.ldac'):
            return BagOfWordsData.LoadFromFile_ldac(dpath, **self.DataInfo)
        elif self.dataset_type == 'GroupXData':
            return GroupXData.LoadFromFile(dpath, **self.DataInfo)
        else:
            return XData.read_file(dpath, **self.DataInfo)
Exemplo n.º 20
0
def loadDataForSlice(filepath='', dataset_type='', **kwargs):
    """ Return data object loaded from specific file.

    Keyword args
    ------------
    workerID
    nWorkers
    """
    if filepath.endswith('.ldac'):
        return BagOfWordsData.LoadFromFile_ldac(filepath, **kwargs)
    else:
        if dataset_type == 'GroupXData':
            return GroupXData.LoadFromFile(filepath, **kwargs)
        else:
            return XData.LoadFromFile(filepath, **kwargs)
Exemplo n.º 21
0
 def MakeData(self, N=10000):
     S1 = np.asarray([[100, 0], [0, 0.01]])
     Sigma = np.zeros((2, 2, 4))
     Sigma[:, :, 0] = S1
     Sigma[:, :, 1] = RandUtil.rotateCovMat(S1, theta=np.pi / 4)
     Sigma[:, :, 2] = RandUtil.rotateCovMat(S1, theta=2 * np.pi / 4)
     Sigma[:, :, 3] = RandUtil.rotateCovMat(S1, theta=3 * np.pi / 4)
     self.Sigma = Sigma
     Xlist = list()
     Rlist = list()
     for k in range(Sigma.shape[2]):
         curX = RandUtil.mvnrand([0, 0], Sigma[:, :, k], N)
         curresp = np.zeros((N, 4))
         curresp[:, k] = 1.0
         Xlist.append(curX)
         Rlist.append(curresp)
     X = np.vstack(Xlist)
     self.Data = XData(X=X)
     self.trueresp = np.vstack(Rlist)
Exemplo n.º 22
0
  def setUp(self, K=7):
    ''' Create random data, and a K component MixModel to go with it
        Call this original model "hmodel".
        We copy hmodel into "modelB", and then save to file via save_model()
    '''
    self.K = K
    PRNG = np.random.RandomState(867)
    X = PRNG.randn(100,2)
    self.Data = XData(X=X)

    aPDict = dict(alpha0=1.0)
    oPDict = dict(min_covar=1e-9)
    self.hmodel = HModel.CreateEntireModel('EM','MixModel','ZMGauss', 
                                            aPDict, oPDict, self.Data)
    modelB = self.hmodel.copy()    
    initParams = dict(initname='randexamples', seed=0, K=self.K)
    modelB.init_global_params(self.Data, **initParams)
    ModelWriter.save_model(modelB, '/tmp/', 'Test')
    self.modelB = modelB
Exemplo n.º 23
0
def get_data(seed=8675309, nObsTotal=25000, **kwargs):
  X, TrueZ = generateData( seed, nObsTotal)
  Data = XData(X=X, TrueZ=TrueZ)
  Data.summary = get_data_info()
  return Data
Exemplo n.º 24
0
def get_minibatch_iterator(seed=8675309, nObsTotal=25000, **kwargs):
  X, TrueZ = generateData(seed, nObsTotal)
  Data = XData(X=X, TrueZ=TrueZ)
  DataIterator = MinibatchIterator(Data, **kwargs)
  DataIterator.summary = get_data_info()
  return DataIterator
Exemplo n.º 25
0
 def setUp(self):
     X = np.random.randn(100, 3)
     self.Data = XData(X=X)
     self.DataIterator = MinibatchIterator(self.Data, nBatch=10, nLap=10)
Exemplo n.º 26
0
def get_data(seed=8675309, nObsTotal=25000, **kwargs):
    X, TrueZ = generateData(seed, nObsTotal)
    Data = XData(X=X, TrueZ=TrueZ)
    Data.name = get_short_name()
    Data.summary = get_data_info()
    return Data