def old_main(): global feature_selector # feature_selector = tuple(range(15)) + tuple(range(16,23)) # all feature_selector = tuple(range(11)) + tuple(range(16,19)) # head and hand position # feature_selector = tuple(range(8)) # only the head motion_type = 'learn' score_col = 259 # 259-learn_score, 151-learn_srate, 302-prac_score, 433-prac_srate score_format = int # score format function out_file = 'learn_score_{}.pkl'.format(len(feature_selector)) print('Output filename is {}'.format(out_file)) N = 62 # number of rows workbook = xlrd.open_workbook(resultFile) sheet = workbook.sheet_by_index(0) # retrieve the uid and score from excel uids = [sheet.cell(i,0).value for i in range(1,N)] scores = [score_format(sheet.cell(i,score_col).value) for i in range(1,N)] dataset = [] for i,x in enumerate(uids): utils.progress(i+1,N) movdata = load_user_motion(x,motion_type) if movdata is not None: dataset.append( (x,movdata,scores[i]) ) utils.pickle_dump(dataset,out_file)
def extract_cvx_feature(X, inds, verbose=False): B = X[inds, :] ret = [] for i, item in enumerate(X): if verbose: progress(i + 1, X.shape[0], "Extracting Feature") ret.append(distance_p2ch(item, B, EPS=1e-2)[1]) return np.array(ret)
def chowliu_tree(data): ''' Learn a chowliu tree structure based on give data data: S*N numpy array, where S is #samples, N is #RV (Discrete) ''' _,D = data.shape marginals = {} # compute single r.v. marginals totalnum = D + (D*(D-1))/2 nownum = 0 for i in range(D): nownum += 1; utils.progress(nownum,totalnum,'Learning chowliu tree') values, counts = np.unique(data[:,i], return_counts=True) marginals[i] = dict(zip(values, counts)) # compute joint marginal for each pair for i,j in utils.halfprod(range(D)): nownum += 1; utils.progress(nownum,totalnum,'Learning chowliu tree') values, counts = np.unique(data[:,(i,j)], axis=0 ,return_counts=True) values = list(map(lambda x:tuple(x),values)) marginals[i,j] = dict(zip(values, counts)) allcomb = utils.crossprod(list(marginals[i].keys()),list(marginals[j].keys())) for v in allcomb: if v not in marginals[i,j]: marginals[i,j][v] = 0 # normalize all marginals for key in marginals: dist = marginals[key] summation = sum(dist.values()) for k in dist: dist[k] = (dist[k]+1) / float(summation) # 1- correction mutual = {} # compute mutual information for i,j in utils.halfprod(range(D)): mutual[i,j] = 0 for vi,vj in marginals[i,j]: mutual[i,j] += np.log(marginals[i,j][vi,vj] / (marginals[i][vi] * marginals[j][vj])) * marginals[i,j][vi,vj] # find the maximum spanning tree G = Graph(digraph=False) for i in range(D): node = Node('N{}'.format(i)) node.domain = list(marginals[i].keys()) G.add_vertice(node) for i,j in mutual: G.add_edge(i,j,weight = mutual[i,j]) G = G.max_spanning_tree() root = int(D/2) G = G.todirect(root) return G
def main(args): score_col = 259 # 259-learn_score, 151-learn_srate, 302-prac_score, 433-prac_srate score_format = int # score format function # ============================================== rootdir = args[0] mov_type = args[1] out_file = args[2] resultFile = f'{rootdir}/results.xlsx' N = 62 # number of rows workbook = xlrd.open_workbook(resultFile) sheet = workbook.sheet_by_index(0) uids = [sheet.cell(i,0).value for i in range(1,N)] scores = [score_format(sheet.cell(i,score_col).value) for i in range(1,N)] selector = tuple(range(14)) + tuple(range(15,22)) # head pos + orientation, lhand pos + orientation, skip (lbutton), rhand pos + orientation dataset = [] for i,u in enumerate(uids): utils.progress(i+1,N) motion_dir = '{}/mov_{}'.format(rootdir,mov_type) # find the filename of that user candidates = glob( '{}/{}_*.csv'.format(motion_dir,u) ) if len(candidates) < 1: print('User {} does not have motion data'.format(uid)) else: assert(len(candidates) == 1) mov = utils.read_text(candidates[0],header = True) data = np.array(mov) data = data[:,selector] data = data.astype(float) # invalid data remove valid = 0 EPS = 1e-20 while 1: row = data[valid,:] if np.all(np.abs(row)> EPS): break valid += 1 if valid != 0: print('Eliminated {} rows in {}'.format(valid,candidates[0])) data = data[valid:,:] dataset.append( (u,data,scores[i]) ) utils.pickle_dump(dataset,out_file)
def find_cvx_hull(X, k, verbose=False): N, F = X.shape if N <= F: print('WARN: #Samples less than #Feature') ################### ratio = 0.1 pct = 0.1 multiple = 3.0 ################### candidates = np.array(list(range(X.shape[0]))) idxes = [0] init_dist = -1 last_dist = -1 t_start = time.time() for i in range(k): if verbose: progress(i + 1, k, "Calculating CVX Hull") B = X[idxes, :] D = np.array([distance_p2ch(item, B)[0] for item in X[candidates, :]]) ind = np.argmax(D) idxes.append(candidates[ind]) maxdis = D[ind] thresh = max(ratio * maxdis, np.percentile(D, pct * 100)) selector = D > thresh if np.sum(selector) > multiple * (k - i): candidates = candidates[selector] if i == 0: init_dist = maxdis else: last_dist = maxdis t_end = time.time() if verbose: print('Time:{:.2f} secs DeductionRatio:{:.2f}'.format( t_end - t_start, 1 - last_dist / init_dist)) return idxes
def chowliu_tree(data): N,D = data.shape maxN = (D*(D-1))/2 curN = 0 g = Graph(digraph=False) for i in range(D): n = Node('x{}'.format(i)) g.add_vertice(n) allpair = crossprod(range(D),range(D)) for i,j in allpair: curN += 1; progress(curN,maxN,'Calculate mutual info') mu = np.mean(data[:,(i,j)],axis=0) var = np.cov(data[:,(i,j)],rowvar=False) coef = var[0,1] / np.sqrt(var[0,0]*var[1,1]) # mutual = - np.log(1-coef*coef) g.add_edge(i,j,weight=coef) g = g.max_spanning_tree() g = g.todirect(0) return g
def fit(self,traindata): # traindata - list of 2D numpy array M = len(traindata) for i in range(M): progress(i+1,M,'DBN learning') data = traindata[i] T,N = data.shape assert(N == self.G.N) # basically learning the empirical distribution for t in range(T): now = data[t,:] if t == 0: for i in self.SV: idx = tuple(now[self.ICPT[i].ids]) self.ICPT[i].P[idx] += 1 else: prev = data[t-1,:] exnow = np.append(now,[0 for i in self.SV]) for k,v in self.M.items(): exnow[v] = prev[k] for i in self.SV: idx = tuple(exnow[self.CPT[i].ids]) self.CPT[i].P[idx] += 1 for i in self.EV: idx = tuple(now[self.CPT[i].ids]) self.CPT[i].P[idx] += 1 # normalize all CPT for i in range(self.G.N): self.norm_CPT(self.CPT[i]) for i in self.SV: self.norm_CPT(self.ICPT[i]) return
def predict(self,testdata,it=0): T,D = testdata.shape assert(D == self.g.N), "Invalid test data" if it<=0: it=int(1.1*D*T) fg = self.construct_factor_graph(T) edgeset = fg.get_edges() # pre-find the neighbors of RV node for i in range(fg.N): node = fg.V[i] if node.type == 'RV': node.nb = fg.find_neighbor(i) message = {} # initialize all the messages for i,j in edgeset: message[i,j] = (1.0,1.0) # loppy GaBP for xx in range(it): progress(xx+1,it,'Message Passing') for i,j in edgeset: ni,nj = fg.V[i],fg.V[j] # no need for passing message to evidence node if (nj.type == 'RV') and ((j%D) in self.ev): continue ############ if ni.type == 'RV' and nj.type == 'FN': if (i%D) in self.ev: vi = testdata[i//D,i%D] message[i,j] = (vi,1e+8) else: X0,P0 = (0,0) if ni.P != 'na': mu,P = ni.P X0 += mu*P P0 += P for n in ni.nb: if n==j:continue mu,P = message[n,i] X0 += mu*P P0 += P assert(P0!=0),"Leaf node must be EV" message[i,j] = (X0/P0,P0) elif ni.type == 'FN' and nj.type == 'RV': ids = deepcopy(ni.ids) b,P = deepcopy(ni.P) if j!=ids[0]: idx = ids.index(j) ids[0],ids[idx] = ids[idx],ids[0] b[1],b[idx+1] = b[idx+1],b[1] vmu = [] vP = [P] for nid in ids[1:]: mu,P = message[nid,i] vmu.append(mu) vP.append(P) # calculate mu summation = 0.0 for s,v in enumerate(vmu): summation += v*b[s+2] mu = -(summation+b[0])/b[1] # calculate P product = np.prod(vP) numerator = b[1]*b[1]*product denominator = product/vP[0] for s in range(2,len(b)): denominator += b[s]*b[s]*product/vP[s-1] P = numerator/denominator message[i,j] = (mu,P) else: assert(False), "Invalid Factor graph!" # calculate marginal distribution prediction = deepcopy(testdata) for i in range(fg.N): node = fg.V[i] if node.type == 'RV' and (i%D) in self.sv: if node.P == 'na': X0,P0=0,0 else: mu,P = node.P X0 = mu*P P0 = P for n in node.nb: mu,P = message[n,i] X0 += mu*P P0 += P prediction[i//D,i%D] = X0/P0 return prediction
def smooth(self,data,numnodes=4,smooth=True): assert(numnodes > 1) st = 0 appro = [] while st < len(self.SV): ed = st + numnodes if ed > len(self.SV): ed = len(self.SV) appro.append(self.SV[st:ed]) st = ed # create junction tree J1 T1G = deepcopy(self.G) T1G = T1G.moralize() for bkc in appro: for s,t in crossprod(bkc,bkc): T1G.add_edge(s,t) self.J1 = T1G.junction_tree(preserve=self.G) # find come and out node self.J1.out = [] for bkc in appro: self.J1.out.append( self.min_clique(self.J1,bkc) ) self.J1.come = deepcopy(self.J1.out) # create junction tree Jt T2G = self.G2.moralize() for bkc in appro: for s,t in crossprod(bkc,bkc): T2G.add_edge(s,t) fbkc = list(map(lambda x:self.M[x],bkc)) for s,t in crossprod(fbkc,fbkc): T2G.add_edge(s,t) self.J2 = T2G.junction_tree(preserve = self.G2) # find come and out node self.J2.out = [] for bkc in appro: self.J2.out.append( self.min_clique(self.J2,bkc) ) self.J2.come = [] for bkc in appro: fbkc = list(map(lambda x:self.M[x],bkc)) self.J2.come.append( self.min_clique(self.J2,fbkc) ) T,N = data.shape assert(N == self.G.N) fmsg = {} for t in range(T): progress(t+1,T, 'Forward') fmsg[t] = {} evidence = data[t,:] if t==0: self.init_message(self.J1,fmsg[t]) self.multiply_CPT(self.J1,evidence,fmsg[t],init=True) # collect message to out node for each bk cluster npt = deepcopy(fmsg[t]) message = self.calculate_msg(self.J1,npt) for i in self.J1.out: fmsg[t][i] = self.collect_msg(self.J1,i,npt,message) else: pt = t-1 self.init_message(self.J2,fmsg[t]) self.multiply_CPT(self.J2,evidence,fmsg[t]) # absorb message from the previous time slice for i,inid in enumerate(self.J2.come): if pt == 0: outid = self.J1.out[i] else: outid = self.J2.out[i] msg = self.get_message(fmsg[pt][outid],fmsg[t][inid],timestep = 1) fmsg[pt][outid,-1] = msg fmsg[t][inid] = self.multiply_potential(msg,fmsg[t][inid]) npt = deepcopy(fmsg[t]) message = self.calculate_msg(self.J2,npt) for i in self.J2.out: fmsg[t][i] = self.collect_msg(self.J2,i,npt,message) if t==(T-1): for i,outid in enumerate(self.J2.out): inid = self.J2.come[i] fmsg[t][outid,-1] = self.get_message(fmsg[t][outid],fmsg[t][inid],timestep = 1) if smooth: endtime = -1 else: endtime = T bmsg = {} for t in range(T-1,endtime,-1): progress(T-t,T, 'Backward') bmsg[t] = {} evidence = data[t,:] if t==(T-1): curG = self.J2 self.init_message(curG,bmsg[t]) self.multiply_CPT(curG,evidence,bmsg[t]) npt = deepcopy(bmsg[t]) message = self.calculate_msg(curG,npt) for i,inid in enumerate(curG.come): bmsg[t][inid] = self.collect_msg(curG,inid,npt,message) outid = curG.out[i] bmsg[t][-1,outid] = self.init_potential(appro[i]) if t<(T-1): nt = t+1 curG = self.J2 if t==0: curG = self.J1 # initialize message self.init_message(curG,bmsg[t]) if t==0: self.multiply_CPT(curG,evidence,bmsg[t],init=True) else: self.multiply_CPT(curG,evidence,bmsg[t]) # absorb message from the previous time slice for i,outid in enumerate(curG.out): inid = self.J2.come[i] msg = self.get_message(bmsg[nt][inid],bmsg[t][outid],timestep = -1) bmsg[t][-1,outid] = msg bmsg[t][outid] = self.multiply_potential(msg,bmsg[t][outid]) npt = deepcopy(bmsg[t]) message = self.calculate_msg(curG,npt) for i in curG.come: bmsg[t][i] = self.collect_msg(curG,i,npt,message) prediction = deepcopy(data) for t in range(T): if t==0: tg = self.J1 else: tg = self.J2 for bki,outid in enumerate(tg.out): fP = fmsg[t][outid,-1] fP.ids = list(map(lambda x:self.rM[x],fP.ids)) potential = fP if smooth: bP = bmsg[t][-1,outid] potential = self.multiply_potential(potential,bP) P = potential.P/np.sum(potential.P) idx = np.unravel_index(P.argmax(), P.shape) for v in appro[bki]: prediction[t,v] = idx[fP.ids.index(v)] return prediction