def main(): ### Undirected graph ### # Initialize model using the Petersen graph model=gmm.gmm(nx.petersen_graph()) old_graph=model.get_base() model.set_termination(node_ceiling) model.set_rule(rand_add) # Run simualation with tau=4 and Poisson density for motifs gmm.algorithms.simulate(model,4) # View results new_graph=model.get_base() print(nx.info(new_graph)) # Draw graphs old_pos=nx.spring_layout(old_graph) new_pos=nx.spring_layout(new_graph,iterations=2000) fig1=plt.figure(figsize=(15,7)) fig1.add_subplot(121) #fig1.text(0.1,0.9,"Base Graph") nx.draw(old_graph,pos=old_pos,node_size=25,with_labels=False) fig1.add_subplot(122) #fig1.text(0.1,0.45,"Simulation Results") nx.draw(new_graph,pos=new_pos,node_size=20,with_labels=False) fig1.savefig("undirected_model.png") ### Directed graph ### # Initialize model using random directed Barabasi-Albert model directed_base=nx.barabasi_albert_graph(25,2).to_directed() directed_model=gmm.gmm(directed_base) directed_model.set_termination(node_ceiling) directed_model.set_rule(rand_add) # Run simualation with tau=4 and Poisson density for motifs gmm.algorithms.simulate(directed_model,4) # View results new_directed=directed_model.get_base() print(nx.info(new_directed)) # Draw directed graphs old_dir_pos=new_pos=nx.spring_layout(directed_base) new_dir_pos=new_pos=nx.spring_layout(new_directed,iterations=2000) fig2=plt.figure(figsize=(7,10)) fig2.add_subplot(211) fig2.text(0.1,0.9,"Base Directed Graph") nx.draw(directed_base,pos=old_dir_pos,node_size=25,with_labels=False) fig2.add_subplot(212) fig2.text(0.1,0.45, "Simualtion Results") nx.draw(new_directed,pos=new_dir_pos,node_size=20,with_labels=False) fig2.savefig("directed_model.png") # Export files nx.write_graphml(model.get_base(), "base_model.graphml") nx.write_graphml(directed_model.get_base(), "directed_model.graphml") nx.write_graphml(nx.petersen_graph(), "petersen_graph.graphml")
def gmm_test(dt, tf, mux0, P0, YK, Qk, Rk, Nsu=20, flag_informative=True): global nameBit # add in this functionality so we can change the propagation function dependent on the nameBit ... may or may not be needed if not flag_informative: measure_argument = measurement_uninformative measure_jacobian = measurement_jac_uninformative else: measure_argument = measurement_enkf measure_jacobian = measurement_jac_gmm if nameBit == 1: # create EnKF object GMM = gmm.gmm(2, Nsu, Qk, Rk, eqom_gmm, jac_gmm, process_influence, measure_argument, measure_jacobian) elif nameBit == 2: # create EnKF object GMM = gmm.gmm(2, Nsu, Qk, Rk, eqom_gmm, jac_gmm, process_influence, measure_argument, measure_jacobian) elif nameBit == 3: # create EnKF object GMM = gmm.gmm(2, Nsu, Qk, Rk, eqom_gmm, jac_gmm, process_influence, measure_argument, measure_jacobian) nSteps = int(tf / dt) + 1 ts = 0.0 # initialize EnKF GMM.init_monte(mux0, P0, ts) xml = np.zeros((nSteps, 2)) pdf = np.zeros((nSteps, GMM.aki.shape[1])) pdfPts = np.zeros((nSteps, 2, GMM.aki.shape[1])) alphai = np.zeros((nSteps, GMM.aki.shape[1])) Pki = np.zeros((nSteps, 2, 2, GMM.aki.shape[1])) tk = np.arange(0.0, tf, dt) t1 = time.time() fig = [] for k in range(0, nSteps): if k > 0: # get the new measurement ym = np.array([YK[k]]) ts = ts + dt # sync the ENKF, with continuous-time integration print("Propagate to t = %f" % (ts)) # propagate filter GMM.propagate_normal(dt) GMM.update(ym) # log alphai[k, :] = GMM.alphai.copy() xml[k, :] = GMM.get_max_likelihood() Pki[k, :, :, :] = GMM.Pki.copy() (pdfPts[k, :, :], pdf[k, :]) = GMM.get_pdf() if k > 0: GMM.resample() t2 = time.time() print("Elapsed time: %f sec" % (t2 - t1)) return (xml, pdf, pdfPts, alphai, Pki)
def plot_gmm(lib=__library__, alg=__algs__[0], G=1, draw=True, show=True, draw_hist=True, save=False, xlim=None, print_result=True, training=True): # part = 'training' if training else 'testing' # if G == 0: # model = gmm(texts = True, gmm_txt='%s/%s/gmm_%s_%d.npz' % (lib, part, alg, G+1), hist_txt=('%s/%s/hist_%s_%d.npz' % (lib, part, alg, G+1))) # else: # model = gmm(texts = True, gmm_txt='%s/%s/gmm_%s_%d.npz' % (lib, part, alg, G), hist_txt=('%s/%s/hist_%s_%d.npz' % (lib, part, alg, G))) if G == 0: model = gmm(texts = True, gmm_txt='%s/gmm_%s_%d.npz' % (lib, alg, G+1), hist_txt=('%s/hist_%s_%d.npz' % (lib, alg, G+1))) else: model = gmm(texts = True, gmm_txt='%s/gmm_%s_%d.npz' % (lib, alg, G), hist_txt=('%s/hist_%s_%d.npz' % (lib, alg, G))) x = model.bins bin_width = x[1] - x[0] x = np.linspace(model.bins[0], model.bins[-1], num=10000) y = None if G != 0: y = [model.pdf_integral(xi - bin_width/2., xi + bin_width/2.) for xi in x] abc = ['a', 'b', 'c', 'd', 'e', 'f', 'g'] G_code = abc[G] if G != 0: leg = ['Error hist', 'GMM'][::-1] else: leg = ['Error hist'] if save: leg = ['\ss{%s}' % l for l in leg] # if G == 0: leg = None # images.save_latex(x, y, leg, '%s_%s_gmm_%s_%s' % (lib, part, alg, G_code), 'Error', 'Relative weight', 0.48, xlim=xlim, model=model, ylim=None) images.save_latex(x, y, leg, '%s_tmp_gmm_%s_%s' % (lib, alg, G_code), 'Error', 'Relative weight', 0.48, xlim=xlim, model=model, ylim=None) if draw: if draw_hist: pp.figure() pp.bar(model.bins[:-1], model.count) pp.hold(True) if xlim is not None: pp.xlim(xlim) # pp.stem(x, y, markerfmt='xr', linefmt='r-') if G != 0: pp.plot(x, y, color='r', linewidth=3) # if draw_hist: # model.draw_hist(fig=True, limit_hist=False) # pp.hold() # pp.title("%s - %d gmm" % (alg, G)) # model.draw_gmm_hist(fig=False) # # if draw_hist: # pp.legend(['Histogram', 'GMM']) print lib, alg, G if print_result: model.print_results() if show: pp.show()
def plot_gmm(lib, alg=__algs__[0], G=1, draw=True, show=True, draw_hist=True, save=False, xlim=None, print_result=True): if G == 0: model = gmm(texts = True, gmm_txt='%s/gmm_%s_%d.npz' % (lib, alg, G+1), hist_txt=('%s/hist_%s_%d.npz' % (lib, alg, G+1))) else: model = gmm(texts = True, gmm_txt='%s/gmm_%s_%d.npz' % (lib, alg, G), hist_txt=('%s/hist_%s_%d.npz' % (lib, alg, G))) x = model.bins model.count[range(np.where(x == -5)[0][0], np.where(x == 5)[0][0]+1)] = 0 bin_width = x[1] - x[0] x = np.linspace(model.bins[0], model.bins[-1], num=10000) y = None if G != 0: y = [model.pdf_integral(xi - bin_width/2., xi + bin_width/2.) for xi in x] abc = ['a', 'b', 'c', 'd', 'e', 'f', 'g'] G_code = abc[G] if G != 0: leg = ['Error hist', 'GMM'][::-1] else: leg = ['Error hist'] if save: leg = ['\ss{%s}' % l for l in leg] # if G == 0: leg = None images.save_latex(x, y, leg, '%s_gmm_%s_%s_outliers' % (lib, alg, G_code), 'Error', 'Relative weight', 0.48, xlim=xlim, model=model, ylim=None) if draw: if draw_hist: pp.figure() pp.bar(model.bins[:-1], model.count) pp.hold(True) if xlim is not None: pp.xlim(xlim) # pp.stem(x, y, markerfmt='xr', linefmt='r-') if G != 0: pp.plot(x, y, color='r', linewidth=3) # if draw_hist: # model.draw_hist(fig=True, limit_hist=False) # pp.hold() # pp.title("%s - %d gmm" % (alg, G)) # model.draw_gmm_hist(fig=False) # # if draw_hist: # pp.legend(['Histogram', 'GMM']) print lib, alg, G if print_result: model.print_results() if show: pp.show()
def main(): # Load Zachary data, randomly delete nodes, and report zachary=nx.Graph(nx.read_pajek("karate.net")) # Do not want graph in default MultiGraph format zachary.name="Original Zachary Data" print(nx.info(zachary)) zachary_subset=rand_delete(zachary, 15) # Remove half of the structure zachary_subset.name="Randomly Deleted Zachary Data" print(nx.info(zachary_subset)) # Create model, and simulate zachary_model=gmm.gmm(zachary_subset,R=karate_rule,T=node_ceiling_34) gmm.algorithms.simulate(zachary_model,4,poisson=False,new_name="Simulation from sample") # Use tau=4 because data is so small (it's fun!) # Report and visualize print(nx.info(zachary_model.get_base())) fig=plt.figure(figsize=(30,10)) fig.add_subplot(131) nx.draw_spring(zachary,with_labels=False,node_size=45,iterations=5000) plt.text(0.01,-0.1,"Original Karate Club",color="darkblue",size=20) fig.add_subplot(132) nx.draw_spring(zachary_subset,with_labels=False,node_size=45,iterations=5000) plt.text(0.01,-0.1,"Random sample of Karate Club",color="darkblue",size=20) fig.add_subplot(133) nx.draw_spring(zachary_model.get_base(),with_labels=False,node_size=45,iterations=5000) plt.text(0.01,-0.1,"Simulation from random sample",color="darkblue",size=20) plt.savefig("zachary_simulation.png")
def dotest(k, max_iter): print('Model Number is ' + str(k)) input("Press Enter to Continue: ") for i in range(n_classes): train = trains[i] d = np.shape(train)[1] dparams = [None]*d for j in range(d): dparams[j] = gmm.gmm(k,[x[j] for x in train], max_iter) paramses[i] = dparams for i, params in enumerate(paramses): print("Class " + str(i)) for j, param in enumerate(params): print(" Dimension " + str(j)) for l, m in enumerate(param): print(' Model ' + str(l)) print(m) input('Params calculated, Press Enter to stat true rate: ') testlen = len(testA) + len(testB) rate1 = mylib.statTrueRate(testA, paramses, classifier, 0 ) / testlen rate2 = mylib.statTrueRate(testB, paramses, classifier, 1 ) / testlen print(rate1 + rate2)
def doit(dic,priors,classes,K,diag): err = {'train':list(), 'test':list()} for k in K: print '*'*15,'K =',str(k),'*'*15 nums, means, covs, nll = {},{},{},{} # Build GMM models for dif in dic['train']: data = pack(dic['train'][dif]) for i in xrange(6): _nums,_means,_covs,_nll = gmm.gmm(data, weights=None, K=k, hard=True, diagcov=diag) if(i != 0): if(_nll > nll[dif]): continue nums[dif],means[dif],covs[dif],nll[dif] = _nums,_means,_covs,_nll criteria = [snll for dif in dic['train']] kwparams = [{'nums':nums[dif], 'means':means[dif], 'covs':covs[dif], 'prior':priors[dif]} for dif in dic['train']] # Evaluate for x in dic: labels, labels_est = [], [] for dif in dic[x]: points = dic[x][dif] labels += [dif for i in xrange(len(points))] labels_est += optcriterion(points, classes, criteria, kwparams=kwparams, _max=False); e = 100.0*sum( np.array(labels) != np.array(labels_est) ) / len(labels) err[x].append( e ) print 'Confusion marix for' , x , 'data','(K={:},diagcov={:})'.format(k,diag) utils.confusion(labels, labels_est, True) print '% Error: ', e,'\n' if(len(K) > 1): pl.plot(K,err['train'],'--', label= 'Train'+(' (diagcov=True)' if diag else '')) pl.plot(K,err['test'], label= 'Test'+(' (diagcov=True)' if diag else ''))
def doit(dic, priors, classes, K, diag): err = {'train': list(), 'test': list()} for k in K: print '*' * 15, 'K =', str(k), '*' * 15 nums, means, covs, nll = {}, {}, {}, {} # Build GMM models for dif in dic['train']: data = pack(dic['train'][dif]) for i in xrange(6): _nums, _means, _covs, _nll = gmm.gmm(data, weights=None, K=k, hard=True, diagcov=diag) if (i != 0): if (_nll > nll[dif]): continue nums[dif], means[dif], covs[dif], nll[ dif] = _nums, _means, _covs, _nll criteria = [snll for dif in dic['train']] kwparams = [{ 'nums': nums[dif], 'means': means[dif], 'covs': covs[dif], 'prior': priors[dif] } for dif in dic['train']] # Evaluate for x in dic: labels, labels_est = [], [] for dif in dic[x]: points = dic[x][dif] labels += [dif for i in xrange(len(points))] labels_est += optcriterion(points, classes, criteria, kwparams=kwparams, _max=False) e = 100.0 * sum( np.array(labels) != np.array(labels_est)) / len(labels) err[x].append(e) print 'Confusion marix for', x, 'data', '(K={:},diagcov={:})'.format( k, diag) utils.confusion(labels, labels_est, True) print '% Error: ', e, '\n' if (len(K) > 1): pl.plot(K, err['train'], '--', label='Train' + (' (diagcov=True)' if diag else '')) pl.plot(K, err['test'], label='Test' + (' (diagcov=True)' if diag else ''))
def train_model(): """ Read all MFCC files at ./MFCC/ and use the coefficients to train models for each user. The model used is a Gaussian Mixture Model """ models = [] names = [] for files in os.listdir(mfcc_path): print 'Training ' + files.split('.')[0] + "'s model" mfcc = np.loadtxt(mfcc_path + files) models.append(gmm.gmm(mfcc)) names.append(files.split('.')[0]) return models, names
def __init__(self, N, K): self.N = N self.K = K self.gmm = [] for i in range(N): self.gmm.append(G.gmm(K, name='Class'+str(i))) self.con_mat = np.array([]) self.recall = [] self.meanrecall = 0.0 self.precision = [] self.meanprecision = 0.0 self.fmeasure = [] self.meanfmeasure = 0.0 self.accuracy = 0.0
def main(): # Load Zachary data, randomly delete nodes, and report zachary = nx.Graph(nx.read_pajek( "karate.net")) # Do not want graph in default MultiGraph format zachary.name = "Original Zachary Data" print(nx.info(zachary)) zachary_subset = rand_delete(zachary, 15) # Remove half of the structure zachary_subset.name = "Randomly Deleted Zachary Data" print(nx.info(zachary_subset)) # Create model, and simulate zachary_model = gmm.gmm(zachary_subset, R=karate_rule, T=node_ceiling_34) gmm.algorithms.simulate(zachary_model, 4, poisson=False, new_name="Simulation from sample" ) # Use tau=4 because data is so small (it's fun!) # Report and visualize print(nx.info(zachary_model.get_base())) fig = plt.figure(figsize=(30, 10)) fig.add_subplot(131) nx.draw_spring(zachary, with_labels=False, node_size=45, iterations=5000) plt.text(0.01, -0.1, "Original Karate Club", color="darkblue", size=20) fig.add_subplot(132) nx.draw_spring(zachary_subset, with_labels=False, node_size=45, iterations=5000) plt.text(0.01, -0.1, "Random sample of Karate Club", color="darkblue", size=20) fig.add_subplot(133) nx.draw_spring(zachary_model.get_base(), with_labels=False, node_size=45, iterations=5000) plt.text(0.01, -0.1, "Simulation from random sample", color="darkblue", size=20) plt.savefig("zachary_simulation.png")
def binomial_simulation(graph_set, seed=None, verbose=False): """ Given a set of binomial random graphs, simulate using above growth rule """ simulated_graphs = list() # Iterate over graph set to produce several simulations for i in xrange(0, len(graph_set) - 1): # Required to change the growth rule dynamically, # so we define it inside the simulation's for-loop termination_size = graph_set[i + 1].number_of_nodes() def node_ceiling(G): if G.number_of_nodes() >= termination_size: return False else: return True # Setup GMM erdos_renyi_model = gmm.gmm(graph_set[i]) erdos_renyi_model.set_termination(node_ceiling) erdos_renyi_model.set_rule(binomial_growth) sim_name = "GMM SIMULATION-" + erdos_renyi_model.get_base().name gmm.algorithms.simulate(erdos_renyi_model, tau=3, poisson=True, seed=seed, new_name=sim_name) # Run simulation simualted_erdos_renyi = erdos_renyi_model.get_base() # Print the results to stdout if verbose: print(nx.info(graph_set[i + 1])) print(nx.info(simualted_erdos_renyi)) print("") simulated_graphs.append(simualted_erdos_renyi) # Return a list of simulated graphs return (simulated_graphs)
def binomial_simulation(graph_set, seed=None, verbose=False): """ Given a set of binomial random graphs, simulate using above growth rule """ simulated_graphs=list() # Iterate over graph set to produce several simulations for i in xrange(0,len(graph_set)-1): # Required to change the growth rule dynamically, # so we define it inside the simulation's for-loop termination_size=graph_set[i+1].number_of_nodes() def node_ceiling(G): if G.number_of_nodes()>=termination_size: return False else: return True # Setup GMM erdos_renyi_model=gmm.gmm(graph_set[i]) erdos_renyi_model.set_termination(node_ceiling) erdos_renyi_model.set_rule(binomial_growth) sim_name="GMM SIMULATION-"+erdos_renyi_model.get_base().name gmm.algorithms.simulate(erdos_renyi_model,tau=3, poisson=True, seed=seed, new_name=sim_name) # Run simulation simualted_erdos_renyi=erdos_renyi_model.get_base() # Print the results to stdout if verbose: print(nx.info(graph_set[i+1])) print(nx.info(simualted_erdos_renyi)) print("") simulated_graphs.append(simualted_erdos_renyi) # Return a list of simulated graphs return(simulated_graphs)
def setUp(self): """Tests that all models from setup pass inspection.""" # Node ceiling termination rule for 100 nodes def node_ceiling(G): if G.number_of_nodes() >= 100: return True else: return False # Simple random growth rule: connects random node from base to random node from new def rand_add(base, new): from numpy.random import randint new = nx.convert_node_labels_to_integers(new, first_label=max(base.nodes()) + 1) new_base = nx.compose(base, new) base_connector = randint(base.number_of_nodes()) new_connector = randint(min(new.nodes()), max(new.nodes())) new_base.add_edge(base_connector, new_connector) return new_base # This models should pass without exception self.full_model = gmm.gmm(self.five_cycle, T=node_ceiling, R=rand_add)
def setUp(self): """Tests that all models from setup pass inspection.""" # Node ceiling termination rule for 100 nodes def node_ceiling(G): if G.number_of_nodes() >= 100: return True else: return False # Simple random growth rule: connects random node from base to random node from new def rand_add(base, new): from numpy.random import randint new = nx.convert_node_labels_to_integers( new, first_label=max(base.nodes()) + 1) new_base = nx.compose(base, new) base_connector = randint(base.number_of_nodes()) new_connector = randint(min(new.nodes()), max(new.nodes())) new_base.add_edge(base_connector, new_connector) return new_base # This models should pass without exception self.full_model = gmm.gmm(self.five_cycle, T=node_ceiling, R=rand_add)
def dotest(k, max_iter): print('Model Number is ' + str(k)) input("Press Enter to Continue: ") for i in range(n_classes): train = trains[i] d = np.shape(train)[1] dparams = [None] * d for j in range(d): dparams[j] = gmm.gmm(k, [x[j] for x in train], max_iter) paramses[i] = dparams for i, params in enumerate(paramses): print("Class " + str(i)) for j, param in enumerate(params): print(" Dimension " + str(j)) for l, m in enumerate(param): print(' Model ' + str(l)) print(m) input('Params calculated, Press Enter to stat true rate: ') testlen = len(testA) + len(testB) rate1 = mylib.statTrueRate(testA, paramses, classifier, 0) / testlen rate2 = mylib.statTrueRate(testB, paramses, classifier, 1) / testlen print(rate1 + rate2)
def watts_strogatz_simulation(test_graph, k, p, seed=None, verbose=False): """ Given a set of Watts-Strogatz "small-world" random graphs, simulate using above growth rule """ # Required to change the growth rule dynamically, so # we define it inside the simulation function def watts_strogatz_growth(base, new): """ Select k random nodes from new_nodes and connect each node to the nodes in base_nodes with probability p. """ # To keep new nodes from over-writing current ones rename the new nodes starting # from the last node in base new = nx.convert_node_labels_to_integers( new, first_label=max(base.nodes()) + 1) new_nodes = new.nodes() base_nodes = base.nodes() new_base = nx.compose(base, new) # Shuffle base nodes for random selection random.shuffle(base_nodes) # We take only the first k nodes from the shuffled set of new nodes. Then, with probability p, # connect those nodes to all nodes in base_nodes. for n in base_nodes[0:k]: edge_test = zip(random.uniform(size=len(new_nodes)), new_nodes) for d, m in edge_test: if (d <= p): new_base.add_edge(m, n) # Makes graph fully connceted in WS way while nx.number_connected_components(new_base) > 1: mc_nodes = nx.connected_component_subgraphs(new_base)[0].nodes() cc_nodes = nx.connected_component_subgraphs(new_base)[1].nodes() new_edges = list() for i in range(k): random.shuffle(mc_nodes) random.shuffle(cc_nodes) new_edges.append((mc_nodes[0], cc_nodes[0])) new_base.add_edges_from(new_edges) new_base.name = "" return (new_base) # Node ceiling of 1,00 nodes, ensures fully connceted graph def node_ceiling(G): if G.number_of_nodes() >= 100: return False else: return True # Setup GMM watts_strogatz_model = gmm.gmm(test_graph) watts_strogatz_model.set_termination(node_ceiling) watts_strogatz_model.set_rule(watts_strogatz_growth) sim_name = "GMM(p[" + str(p) + "])_" + watts_strogatz_model.get_base().name gmm.algorithms.simulate(watts_strogatz_model, tau=4, poisson=True, seed=seed, new_name=sim_name) # Run simulation simualted_watts_strogatz = watts_strogatz_model.get_base() # Print the results to stdout if verbose: print(nx.info(simualted_watts_strogatz)) print("") # Return a list of simulated graphs return (simualted_watts_strogatz)
def main(argin='./',adaptFlag = False): # output file # initialize EKF #Qkin = np.array([[20.0]])#continuous-time integration value Qkin = np.array([[20.0]])#Euler integration value Rkin = np.array([ [0.0001] ]) GMM = gmm.gmm(2,25,Qkin,Rkin,stateDerivativeGMM,stateJacobian,stateProcessInfluence,measurementFunction,measurementJacobian) dt = 0.01 tfin = 10.0 nSteps = int(tfin/dt) tsim = 0.0 muk = np.array([1.0,0.0]) Pk = np.array([[0.1,0.0],[0.0,1.0]]) xk = np.random.multivariate_normal(muk,Pk) yk = simMeasurementFunction(xk,tsim) # initial covariance GMM.init_monte(xk,Pk) ## true state xt = np.zeros((nSteps,2)) ## discretized PDF value XK = np.zeros((nSteps,2,GMM.aki.shape[1])) pk = np.zeros((nSteps,GMM.aki.shape[1])) alphai = np.zeros((nSteps,GMM.aki.shape[1])) Pki = np.zeros((nSteps,2,2,GMM.aki.shape[1])) yt = np.zeros(nSteps) tplot = np.zeros(nSteps) t1 = time.time() for k in range(nSteps): # log tplot[k] = tsim xt[k,:] = xk.copy() (XK[k,:,:],pk[k,:]) = GMM.get_pdf() alphai[k,:] = GMM.alphai.copy() Pki[k,:,:,:] = GMM.Pki.copy() # propagate filter GMM.propagate_normal(dt) # simulate y = sp.odeint(stateDerivative,xk,np.array([tsim,tsim+dt]),args=([],) ) xk = y[-1,:].copy() # update time tsim = tsim + dt # measurement ymeas = simMeasurementFunction(xk,tsim) # store measurement yt[k] = ymeas[0] # update EKF GMM.update(ymeas) print("%f,%f" % (tsim,ymeas[0])) # resample? GMM.resample() t2 = time.time() print('Completed simulation in %f seconds' % (t2-t1)) print(XK.shape,tplot.shape) # len(tplot) x Ns matrix of times tMesh = np.kron(np.ones((GMM.aki.shape[1],1)),tplot).transpose() # find the max of the PDF for the maximum liklihood estimate xml = np.zeros((nSteps,2)) Pkk = np.zeros((nSteps,2)) for k in range(nSteps): idmax = np.argmax(pk[k,:]) xml[k,:] = XK[k,:,idmax].transpose() # compute the covariance mu = np.zeros(2) for j in range(GMM.aki.shape[1]): mu = mu + alphai[k,j]*XK[k,:,j] Pxx = np.zeros((2,2)) for j in range(GMM.aki.shape[1]): Pxx = Pxx + alphai[k,j]*(Pki[k,:,:,j] + np.outer(XK[k,:,j]-mu,XK[k,:,j]-mu)) #print("%f,%f|%f,%f,%f,%f" % (mu[0],mu[1],Pxx[0,0],Pxx[0,1],Pxx[1,0],Pxx[1,1])) Pkk[k,0] = Pxx[0,0] Pkk[k,1] = Pxx[1,1] fig = plt.figure() ax = [] for k in range(4): if k < 2: nam = 'x' + str(k+1) else: nam = 'e' + str(k-1) ax.append( fig.add_subplot(2,2,k+1,ylabel=nam) ) if k < 2: if k == 0: # plot the discrete PDF as a function of time mex = tMesh.reshape((len(tplot)*GMM.aki.shape[1],)) mey = XK[:,0,:].reshape((len(tplot)*GMM.aki.shape[1],)) mez = pk.reshape((len(tplot)*GMM.aki.shape[1],)) elif k == 1: # plot the discrete PDF as a function of time mex = tMesh.reshape((len(tplot)*GMM.aki.shape[1],)) mey = XK[:,1,:].reshape((len(tplot)*GMM.aki.shape[1],)) mez = pk.reshape((len(tplot)*GMM.aki.shape[1],)) idx = mez.argsort() mexx,meyy,mezz = mex[idx],mey[idx],mez[idx] cc = ax[k].scatter(mexx,meyy,c=mezz,s=20,edgecolor='') fig.colorbar(cc,ax=ax[k]) # plot the truth ax[k].plot(tplot,xt[:,k],'b-') elif k < 4: ax[k].plot(tplot,xt[:,k-2]-xml[:,k-2],'b-') ax[k].plot(tplot,3.0*np.sqrt(Pkk[:,k-2]),'r--') ax[k].plot(tplot,-3.0*np.sqrt(Pkk[:,k-2]),'r--') ax[k].grid() fig.show() raw_input("Return to exit") print("Completed test_enky.py") return
pos = np.zeros((img.shape[0], img.shape[1], 2)) for i in range(pos.shape[0]): for j in range(pos.shape[1]): pos[i, j, :] = [i, j] pos = pos.reshape((-1, 2)) data = np.hstack((pos, np.reshape(img, (-1, 3)))) # k_means res = k_means(data, 3, iter_times=20, dist_func=dist) tag = res[:, -1] tag = np.reshape(tag, c.shape) plt.figure(2) plt.imshow(tag) plt.axis('off') plt.show() # gmm res = gmm(data, 3, iter_times=20) tag = res[:, -1] tag = np.reshape(tag, c.shape) plt.figure(3) plt.imshow(tag) plt.axis('off') plt.show() # dbscan res = dbscan(data, 10, 100, dist_func=dist) tag = res[:, -1] tag = np.reshape(tag, c.shape) plt.figure(4) plt.imshow(tag) plt.axis('off') plt.show()
def barabasi_albert_simulation(test_graph, m, seed=None, verbose=False): """ Given a set of Barabasi-Albert "preferential attachment" random graphs, simulate using above growth rule """ # Required to change the growth rule dynamically, so # we define it inside the simulation function def barabasi_albert_growth(base, new): """ Select m random nodes from new_nodes and connect each node to the nodes in base_nodes as a function of the degree of the node in base_nodes. The basic "preferential attachment" model. """ # To keep new nodes from over-writing current ones rename the new nodes starting # from the last node in base new = nx.convert_node_labels_to_integers( new, first_label=max(base.nodes()) + 1) new_nodes = new.nodes() base_nodes = base.nodes() new_base = nx.compose(base, new) # Shuffle new_nodes random.shuffle(new_nodes) # Create edge test based on degree centrality base_degree = nx.degree_centrality(base).items() # Create "preferential attachment" structure by connecting m # nodes from new structure to nodes in base as a function of # base nodes' degree centrality for i in xrange(m): edge_made = False while edge_made is False: # Randomly select a node in base and add connection # based on its degree centrality p = random.uniform() j = random.randint(0, len(base_nodes)) if p <= base_degree[j][1]: k = random.randint( len(new_nodes)) # Randomly select a new node new_base.add_edge(new_nodes[k], base_degree[j][0]) edge_made = True return (new_base) # Simple node ceiling of 1,000 nodes def node_ceiling(G): if G.number_of_nodes() >= 1000: return False else: return True # Setup GMM barabasi_albert_model = gmm.gmm(test_graph) barabasi_albert_model.set_termination(node_ceiling) barabasi_albert_model.set_rule(barabasi_albert_growth) sim_name = "GMM SIMULATION-" + barabasi_albert_model.get_base().name gmm.algorithms.simulate(barabasi_albert_model, tau=3, poisson=True, seed=seed, new_name=sim_name) # Run simulation simualted_barabasi_albert = barabasi_albert_model.get_base() # Print the results to stdout if verbose: print(nx.info(simualted_barabasi_albert)) print("") # Return a list of simulated graphs return (simualted_barabasi_albert)
def execute(lib=__library__): start = datetime.datetime.now() print "START TIME", str(start) # fetch shape of images img = images.fetch_disp("bm", 1, __library__) sh = (int(np.shape(img)[0]), int(np.shape(img)[1])) shape = (1080, 1920) # check if ground is available img = images.fetch_ground(1, __library__) ground_avail = img is not None if ground_avail: # shape = list(np.array(shape) * 2) shape = (shape[0], shape[1]) else: # print shape shape = (shape[0], shape[1] * 2) print "starting..." global __begin__ global __end__ for alg in __algs__: if __timer__: tm = timer(__begin__, __end__) mx = 0 overall_diff = np.array([], dtype=__dtype__) file_num = 0 file_count = 0 max_count = 10 found_count = 0 full_count = 0 # cv2.imshow('1', images.fetch_disp(alg, 1, __library__)) # cv2.imshow('1b', (images.fetch_ground(1, __library__).astype('float') - images.fetch_disp(alg, 1, __library__)[:,:,0].astype('float')).astype('uint8')) # cv2.imshow('2', images.fetch_disp(alg, 2, __library__)) # cv2.imshow('2b', (images.fetch_ground(2, __library__).astype('float') - images.fetch_disp(alg, 2, __library__)[:,:,0].astype('float')).astype('uint8')) cv2.waitKey() if __save_data_only__: print 'fetching data' print __library__ for i in range(__begin__, __end__ + 1): orig = images.fetch_orig(i, __library__) ground = images.fetch_ground(i, __library__) disp = images.fetch_disp(alg, i, __library__) # cv2.imshow('disp', disp) # cv2.imshow('diff', (ground.astype('float') - disp[:,:,0].astype('float')).astype('uint8')) # cv2.waitKey() if orig is None: print 'orig is None' if ground is None: print 'ground is None' if disp is None: print 'disp is None' if disp is not None and ground_avail: if len(disp.shape) == 3: disp = disp[:,:,0] diff = ((ground.astype(__dtype__)) - (disp.astype(__dtype__))).astype(__dtype__) full_count += np.product(diff.shape) # exclude not calculated flat = diff[np.where(disp != 0)] flat_ground = ground[np.where(disp != 0)] flat = flat[np.where(flat_ground != 0)] found_count += len(np.nonzero(flat.flatten())[0]) # exclude incalculable values # diff[ground > 16*6] = 0 diff = np.array(diff, dtype=__dtype__) overall_diff = np.concatenate((overall_diff, flat)) if file_count == max_count: print 'saving', '%s/data/data_%s_%d.npz' % (lib, alg, file_num), i np.savez('%s/data/data_%s_%d.npz' % (lib, alg, file_num), data=overall_diff) overall_diff = np.array([], dtype=__dtype__) file_count = 1 file_num += 1 else: file_count += 1 if __timer__: tm.progress(i) print 'found', found_count, 'of', full_count if full_count != 0: print float(found_count) / full_count * 100.0 # np.savez('%s/data/data_%s_%d.npz' % (lib, alg, file_num), data=overall_diff) if __save_data_only__: return data_files = list() total = __end__ - __begin__ + 1 # range(int(math.ceil(total / float(max_count)))) for g in __G__: print print 'calculating for G =', g for alg in __algs__: print 'Calculating for alg', alg data_files = ['%s/data/data_%s_%d.npz' % (lib, alg, i) for i in range(0, int(math.floor((__end__ - __begin__) / 10.)))] print data_files # data_files = ['%s/data/data_%s_%d.npz' % (lib, alg, i) for i in range(int(__begin__ / 10.), int(math.ceil(__end__ / 10.)))] #if __library__ == 'tsukuba': # data_files=data_files[:-1] print 'creating model' model = gmm(data_files, debug=__dbg__, history=True, timer=__timer__, limit=limit) model.fit_model(g) model.save_results(library=__library__, alg=alg, hist=True) # model.draw_results() print "END TIME ", str(datetime.datetime.now()) print "TIME PASSED", str(datetime.datetime.now() - start) print # pp.show() print "done"
plt.scatter(points[:, 0], points[:, 1]) plt.xlabel('x') plt.ylabel('y') plt.title( "k-means clusters(with euclidean distance) over data{}".format(k)) print("method: kmeans - data{} - euclidean distance".format(k)) etr = Estimater() etr.get_internal_index(results, euclidean_distance) results = np.hstack((results, data[:, -1:])) etr.get_external_index(results) plt.show() # gmm for k in range(1, 4): data = np.load("data/{}.npy".format(k)) results = gmm(data[:, :-1], k=3) plt.subplot(1, 3, k) for i in range(10): points = results[results[:, -1] == i] plt.scatter(points[:, 0], points[:, 1]) plt.xlabel('x') plt.ylabel('y') plt.title("gmm clusters over data{}".format(k)) print("method: gmm - data{}".format(k)) etr = Estimater() etr.get_internal_index(results, euclidean_distance) results = np.hstack((results, data[:, -1:])) etr.get_external_index(results) plt.show() # dbscan
return self.base else: return self.rule(self.base,new) def am_gmm(self): """Simple function to test if object is a gmm""" return self.am_gmm if __name__ == '__main__': # Create most basic GMM object with five node cycle graph as base. import gmm G=nx.cycle_graph(5) model=gmm.gmm(G) # Using five node cycle graph, create gmm object with node ceiling # termination rule of 100 nodes def degree_ceiling(G): if G.number_of_nodes()>=100: return True else: return False model=gmm.gmm(G,degree_ceiling) # Finally, add a simple random growth rule def rand_add(base, new):
def setUp(self): # Most basic GMM; no growth or termination rules. self.base_model = gmm.gmm(self.five_cycle) self.base_directed = gmm.gmm( copy.deepcopy(self.five_cycle).to_directed())
lda = dimred.LDA(data, classes, labels, center=True, scale=True) projData = lda.transform(data) x = projData[0, :] y = projData[1, :] points = splitClasses(data, classes, labels) gaussians = [density.Gaussian(data=p) for p in points] means = np.array([g.mean() for g in gaussians]).T covs = np.array([g.cov() for g in gaussians]) nums = np.array([p.shape[1] for p in points]) weights, nll = gmm.calcresps(data, nums, means, covs, hard=hard) nums, means, covs, nll = gmm.gmm(data, weights, K=3, hard=hard, diagcov=False) numsd, meansd, covsd, nlld = gmm.gmm(data, weights, K=3, hard=hard, diagcov=True) print 'Question 2' print 'NLL for diagcov=False: ', nll print 'NLL for diagcov=True: ', nlld print 'The diagcov=False seems to be the better choice.' print 'Because we have enough data per class we can use a full estimate ' print 'of the covariance matrix without over-fitting. ' print 'These extra parameters allow us to make a better model which is '
def setUp(self): # Most basic GMM; no growth or termination rules. self.base_model=gmm.gmm(self.five_cycle) self.base_directed=gmm.gmm(copy.deepcopy(self.five_cycle).to_directed())
def gmm_demo(): df = load_data() df = extract_features(df) features_list = list(df.columns.values)[1:] gmm(df, features_list)
ib = [] for row in data: Ib = eval(row[6]) ib.append(Ib) ib = np.array(ib) def ibase(): return ib C = 3 (mu_est, sigma_est, p_est, counter, difference) = gmm(ib,C,1e-5) print('___ Ibase Parameters ___') print('------Means--------') print('mu_1=%1.4f'%mu_est[0]) print('mu_2=%1.4f'%mu_est[1]) print('mu_3=%1.4f'%mu_est[2]) print('------Variance--------') print('sigma_1=%1.4f'%sigma_est[0]) print('sigma_2=%1.4f'%sigma_est[1]) print('sigma_3=%1.4f'%sigma_est[2]) print('------Weights-------') print('W_1=%1.4f'%p_est[0]) print('W_2=%1.4f'%p_est[1])
if "xy" in opts.color and opts.method != "watershed": temp = img.copy() img = np.zeros((img.shape[0], img.shape[1], img.shape[2] + 2)) img[:, :, 0:temp.shape[2]] = temp img[:, :, temp.shape[2]] = np.array(range(img.shape[0])).reshape( img.shape[0], 1) img[:, :, temp.shape[2] + 1] = np.array(range(img.shape[1])).reshape( 1, img.shape[1]) # execute the requested clustering method if "watershed" in opts.method: clustering = watershed(img, opts.k) if "kmeans" in opts.method: clustering = kmeans(img, opts.k) if "gmm" in opts.method: clustering = gmm(img, opts.k) if "hierarchical" in opts.method: clustering = hierarchical(img, opts.k) # read the truth truth = sio.loadmat(opts.img_file.replace('jpg', 'mat')) truth = truth['groundTruth'][0, 4][0][0]['Segmentation'] # plot and save the results with a nice title title = opts.img_file.split("/")[-1].replace(".jpg", "") + "_k=" + str( opts.k) + "_" + opts.method showSaveResults(imgoriginal, clustering, truth, title) # calculate and report mutual information print("Mutual information (more is better): " + str(mutual_info_score(truth.flatten(), clustering.flatten()))) print(
if __name__ == '__main__': from pandas import read_csv import gmm #Load datasets using pandas interface data1 = read_csv("BankNoteAuthentication.csv") data2 = read_csv("WineQuality-WhiteWine.csv") data1.dropna(axis="columns", how="any", inplace=True) data1.dropna(axis="columns", how="any", inplace=True) #Data set 1 clustered trainee1 = data1[["skewness", "curtosis"]] model1 = gmm.gmm(clusters=5, iter=25, randSeed=42) normalized1 = model1.normalizeSet(trainee1) model1.trainModel(normalized1) model1.draw(normalized1, model1.u, model1.sig, xAxis=trainee1.columns.values[0], yAxis=trainee1.columns.values[1]) #Data set 2 clustered trainee2 = data2[["total sulfur dioxide", "chlorides"]] model2 = gmm.gmm(clusters=2, iter=50, randSeed=42) normalized2 = model2.normalizeSet(trainee2)
def __init__(self): self.gmm = gmm()
def main(): # Ensure all data has been downloaded and processed #utils.download_trips_dataset() #for y in utils.YEARS: # utils.load_trips_dataframe(y) # process_trips(trips_df) np.random.seed(1) rc('font', family='serif') station_info = utils.load_station_info() start_time_matrix, station_idx, time_idx, time_at_idx = utils.load_start_time_matrix( ) stop_time_matrix, _, _ = utils.load_stop_time_matrix() start_time_matrix = start_time_matrix.astype(np.int16) stop_time_matrix = stop_time_matrix.astype(np.int16) inverse_station = {v: k for k, v in station_idx.items()} flow_matrix = stop_time_matrix - start_time_matrix print("Total data points (excluding inactive stations): {:,}".format( np.sum(utils.construct_active_stations_by_bucket(start_time_matrix)))) # Figure 1 Total Volume plot_total_start_trips(start_time_matrix, time_idx) # Figure 2 Avg Week select_stations = [195, 511] plot_avg_week_for_stations(start_time_matrix, station_idx, station_info, select_stations, "Number of trips started at station over week", "avg_week_start_time.pdf") plot_avg_week_for_stations(stop_time_matrix, station_idx, station_info, select_stations, "Number of trips stopped at station over week", "avg_week_stop_time.pdf") plot_avg_week_for_stations( flow_matrix, station_idx, station_info, select_stations, #360, 195, 146, 432, 161, 497, 517], "Net change in bikes at station over week", "avg_week_flow_time.pdf", "Number of Arriving Trips") # # Cluster stations print("Clustering stations") avg_weekly_flow = utils.get_station_agg_trips_over_week( flow_matrix, np.mean) cluster_assignments, means, ppc = gmm.gmm(avg_weekly_flow, K=3, posterior_predictive_check=True) # Figure 4 Cluster Means # Plot weekly graph for mean of each cluster plot_cluster_means(means, time_at_idx) # Figure 6 Station Map # Plot clustered stations on map plot_map(cluster_assignments, station_info, station_idx, inverse_station) # Figure 7 Posterior Predictive Check # Plot the posterior predictive check random_indices = randint(0, flow_matrix.shape[0], size=150) plot_posterior_predictive_check( flow_matrix[random_indices], ppc[0][random_indices], "Posterior Predictive Check on average flow", "post_pred_check.pdf") # Predictions # Figure 8 Accuracy plot_average_predictor_error(start_time_matrix, flow_matrix) plot_seasonal_average_predictor_error(start_time_matrix, flow_matrix) plot_cluster_predictor_error(start_time_matrix, flow_matrix, cluster_assignments, means) # Figure 9 Seasonal Prediction plot_predicted_total_start_trips(start_time_matrix) # Figure 10 Sample across three seasons plot_cluster_predictor_sample_error(start_time_matrix, flow_matrix, cluster_assignments, means) # Figures not used in the paper # Some interesting stations: 3412, 3324, 3285, 3286, 3153, 360, 195, 2023, 3095, 432, 511, 438 plot_avg_week_for_stations( flow_matrix, station_idx, station_info, [360, 195, 497, 146, 161], "Net change in bikes at station over week (normalized)", "normalized_avg_week_flow_time.pdf", normalize=True) plot_avg_week_for_stations( flow_matrix, station_idx, station_info, [360, 195, 497, 146, 161], "Net change in bikes at station over week (normalized, rounded)", "normalized_round_avg_week_flow_time.pdf", normalize=True, round=True) plot_avg_week_for_stations( flow_matrix, station_idx, station_info, None, "Net change in bikes at station over week (normalized)", "normalized_all_avg_week_flow_time.pdf", normalize=True) plot_avg_week_for_stations(flow_matrix, station_idx, station_info, None, "Net change in bikes at station over week ", "all_avg_week_flow_time.pdf") plot_avg_week_for_stations( flow_matrix, station_idx, station_info, None, "Net change in bikes at station over week (normalized, rounded)", "normalized_round_all_avg_week_flow_time.pdf", normalize=True, round=True) # Plot clustered stations in 2d plot_clustered_stations_2d(avg_weekly_flow, cluster_assignments, means, inverse_station) # Predictions plot_predicted_flow_baseline(flow_matrix)
def watts_strogatz_simulation(test_graph, k, p, seed=None, verbose=False): """ Given a set of Watts-Strogatz "small-world" random graphs, simulate using above growth rule """ # Required to change the growth rule dynamically, so # we define it inside the simulation function def watts_strogatz_growth(base, new): """ Select k random nodes from new_nodes and connect each node to the nodes in base_nodes with probability p. """ # To keep new nodes from over-writing current ones rename the new nodes starting # from the last node in base new=nx.convert_node_labels_to_integers(new,first_label=max(base.nodes())+1) new_nodes=new.nodes() base_nodes=base.nodes() new_base=nx.compose(base,new) # Shuffle base nodes for random selection random.shuffle(base_nodes) # We take only the first k nodes from the shuffled set of new nodes. Then, with probability p, # connect those nodes to all nodes in base_nodes. for n in base_nodes[0:k]: edge_test=zip(random.uniform(size=len(new_nodes)), new_nodes) for d,m in edge_test: if (d <= p): new_base.add_edge(m,n) # Makes graph fully connceted in WS way while nx.number_connected_components(new_base)>1: mc_nodes=nx.connected_component_subgraphs(new_base)[0].nodes() cc_nodes=nx.connected_component_subgraphs(new_base)[1].nodes() new_edges=list() for i in range(k): random.shuffle(mc_nodes) random.shuffle(cc_nodes) new_edges.append((mc_nodes[0], cc_nodes[0])) new_base.add_edges_from(new_edges) new_base.name="" return(new_base) # Node ceiling of 1,00 nodes, ensures fully connceted graph def node_ceiling(G): if G.number_of_nodes()>=100: return False else: return True # Setup GMM watts_strogatz_model=gmm.gmm(test_graph) watts_strogatz_model.set_termination(node_ceiling) watts_strogatz_model.set_rule(watts_strogatz_growth) sim_name="GMM(p["+str(p)+"])_"+watts_strogatz_model.get_base().name gmm.algorithms.simulate(watts_strogatz_model,tau=4, poisson=True, seed=seed, new_name=sim_name) # Run simulation simualted_watts_strogatz=watts_strogatz_model.get_base() # Print the results to stdout if verbose: print(nx.info(simualted_watts_strogatz)) print("") # Return a list of simulated graphs return(simualted_watts_strogatz)
data, classes, labels = readData(os.path.join('..','data','wine.data')) lda = dimred.LDA(data, classes, labels, center=True, scale=True) projData = lda.transform(data) x = projData[0,:] y = projData[1,:] points = splitClasses(data,classes,labels) gaussians = [density.Gaussian(data=p) for p in points] means = np.array([g.mean() for g in gaussians]).T covs = np.array([g.cov() for g in gaussians]) nums = np.array([ p.shape[1] for p in points ]) weights,nll = gmm.calcresps(data, nums, means, covs, hard=hard) nums, means, covs, nll = gmm.gmm(data, weights, K=3, hard=hard, diagcov=False) numsd,meansd,covsd,nlld = gmm.gmm(data, weights, K=3, hard=hard, diagcov=True) print 'Question 2' print 'NLL for diagcov=False: ', nll print 'NLL for diagcov=True: ', nlld print 'The diagcov=False seems to be the better choice.' print 'Because we have enough data per class we can use a full estimate ' print 'of the covariance matrix without over-fitting. ' print 'These extra parameters allow us to make a better model which is ' print 'evident in the differance in negative-log-likelihood(NNL).\n' weights,nll = gmm.calcresps(data, nums, means, covs, hard=hard) pl.figure()
labeled_images = preprocess.load_preprocessed(path + "/Processed", FEATURES_FILE) labeled_images_seg = preprocess.load_preprocessed(seg_path, FEATURES_SEG_FILE) cluster_counts = accuracy_rate.counts_per_cluster(labeled_images) cluster_counts_seg = accuracy_rate.counts_per_cluster(labeled_images_seg) img_features = pca.pca_features(FEATURES_FILE) features_df = pd.DataFrame(img_features) labeled_features = labeled_images.join(features_df) img_features_seg = pca.pca_features(FEATURES_SEG_FILE) features_seg_df = pd.DataFrame(img_features_seg) labeled_features_seg = labeled_images.join(features_seg_df) gmm_labels, gmm_score = gmm.gmm(img_features) gmm_labels_seg, gmm_score_seg = gmm.gmm(img_features_seg) kmeans_labels, kmeans_score = kmeans.kmeans(img_features) kmeans_labels_seg, kmeans_score_seg = kmeans.kmeans(img_features_seg) labeled = accuracy_rate.assigned_images(labeled_images, gmm_labels, kmeans_labels, 0) accuracy_rate.output_files(labeled, 0) voted_labels = accuracy_rate.match_labels(labeled, cluster_counts, 0) labeled_seg = accuracy_rate.assigned_images(labeled_images_seg, gmm_labels_seg, kmeans_labels_seg, SEG_NUM) accuracy_rate.ouput_files(labeled_seg, SEG_NUM) voted_labels_seg = accuracy_rate.match_labels(labeled_seg, cluster_counts_seg, SEG_NUM) gmm_rate, kmeans_rate = get_accuracy_rate(labeled, voted_labels) gmm_rate_seg, kmeans_rate_seg = get_accuracy_rate(labeled_seg, voted_labels_seg)