def test_mixedLogistic_train_tribolium(self): # --- prepare data --- data = prepareTriboliumData() # --- set initial --- beta0, alpha0 = set_initial(data['c'], data['xm'].shape[1], data['xr'].shape[1]) vBetaAlpha0 = mapOptimizationMatrices2Vector(beta0, alpha0) # --- Estimation --- res = mixedLogistic_train(vBetaAlpha0, data['c'], data['y'], data['xm'], data['xr'], data['m']) mapOptimizationVector2Matrices(res.x, data['c'], data['xm'].shape[1])
def test_mixedLogistic_QNtrain(self): # prepare data data = prepareTriboliumData() # set initial beta0, alpha0 = set_initial(data['c'], data['xm'].shape[1], data['xr'].shape[1]) vBetaAlpha0 = mapOptimizationMatrices2Vector(beta0, alpha0) nollk = nollkForOptimization(data['c'], data['y'], data['xm'], data['xr'], data['m']) res = mixedLogistic_QNtrain(nollk, vBetaAlpha0) print '\n' print 'parameter estimates =\n', res.x print 'minimum fval =', res.fun mapOptimizationVector2Matrices(res.x, data['c'], data['xm'].shape[1])
def test_minimize_negLogLikelihoodForOptimize_tribolium(self): # read data df = import_data() xm = pd.get_dummies(df.Replicate).ix[:, 2:] xr = pd.get_dummies(df.Species).ix[:, 1:] m = np.matrix(df.Total).T y = np.matrix(df.Remaining).T # pre-process data xm = addIntercept(xm) # add a leading 1s column xr = addIntercept(xr) # add a leading 1s column c = 3 # three replicates as three components # set initial beta0, alpha0 = set_initial(c, xm.shape[1], xr.shape[1]) vBetaAlpha0 = mapOptimizationMatrices2Vector(beta0, alpha0) res = sp.optimize.minimize(nellkForOptimization, x0=vBetaAlpha0, args=(vBetaAlpha0,c,y,xm,xr,m), method='BFGS')
def test_mixedLogistic_EMtrain_evaluation_tribolium(self): # --- prepare data --- data = prepareTriboliumData() # --- set initial --- beta0, alpha0 = set_initial(data['c'], data['xm'].shape[1], data['xr'].shape[1]) vBetaAlpha0 = mapOptimizationMatrices2Vector(beta0, alpha0) # vBetaAlpha0 = np.repeat(1., 15) # --- EM --- # param, fval, nIter = mixedLogistic_EMtrain(vBetaAlpha0, c, y, xm, xr, m) res = mixedLogistic_EMtrain(vBetaAlpha0, data['c'], data['y'], data['xm'], data['xr'], data['m']) # param in matrix form beta, alpha = mapOptimizationVector2Matrices(res['param'], data['c'], data['xm'].shape[1]) print 'beta =' print beta print 'alpha =' print alpha
def test_mixedLogistic_Mstep_evaluation_tribolium(self): # read data df = import_data() xm = pd.get_dummies(df.Replicate).ix[:, 2:] xr = pd.get_dummies(df.Species).ix[:, 1:] m = np.matrix(df.Total).T y = np.matrix(df.Remaining).T # pre-process data xm = addIntercept(xm) # add a leading 1s column xr = addIntercept(xr) # add a leading 1s column c = 3 # three replicates as three components # set initial beta0, alpha0 = set_initial(c, xm.shape[1], xr.shape[1]) vBetaAlpha0 = mapOptimizationMatrices2Vector(beta0, alpha0) # E-step qfn = mixedLogistic_Estep(vBetaAlpha0, c, y, xm, xr, m) # M-step param, fval = mixedLogistic_Mstep(qfn, vBetaAlpha0) print '\n' print 'parameter estimates = \n', param print 'function value = ', fval print '\n'
c_candidates = np.arange(10) + 1 # candidates for number of hidden groups # It is tuned by cross-validation nfolds = 10 # 'nfolds' cross-validation batchSize = trainAndValidateXm.shape[0] / nfolds # params = [] print('--- Start %d-fold cross-validation ---' % (nfolds)) avgValidateScores = [] for c in c_candidates: print(" \tCurrent number of hidden groups 'c' = %d " % (c)) validateScores = [] for batchIndex in xrange(nfolds): print('\t\t Batch %d ...' % (batchIndex)) # Initial parameters random.seed(25) b0, a0 = set_initial(c, data.dxm + 1, data.dxr + 1) param0 = mapOptimizationMatrices2Vector(b0, a0) validateIds = np.arange(batchIndex * batchSize, (batchIndex + 1) * batchSize) trainIds = list(set(np.arange(trainAndValidateXm.shape[0])) - set(validateIds)) res = mixedLogistic_EMtrain(param0=param0, c=c, y=np.matrix(trainAndValidateY[trainIds]).T, xm=trainAndValidateXm[trainIds, :], xr=trainAndValidateXr[trainIds, :], m=1) pred = mixedLogistic_pred(res=res, xmt=trainAndValidateXm[validateIds, :], xrt=trainAndValidateXr[validateIds, :], c=c,