示例#1
0
def main():
    
    ### Undirected graph ###
    
    # Initialize model using the Petersen graph
    model=gmm.gmm(nx.petersen_graph())
    old_graph=model.get_base()
    model.set_termination(node_ceiling)
    model.set_rule(rand_add)
    
    # Run simualation with tau=4 and Poisson density for motifs
    gmm.algorithms.simulate(model,4)   

    # View results
    new_graph=model.get_base()
    print(nx.info(new_graph))
    
    # Draw graphs
    old_pos=nx.spring_layout(old_graph)
    new_pos=nx.spring_layout(new_graph,iterations=2000)
    fig1=plt.figure(figsize=(15,7))
    fig1.add_subplot(121)
    #fig1.text(0.1,0.9,"Base Graph")
    nx.draw(old_graph,pos=old_pos,node_size=25,with_labels=False)
    fig1.add_subplot(122)
    #fig1.text(0.1,0.45,"Simulation Results")
    nx.draw(new_graph,pos=new_pos,node_size=20,with_labels=False)
    fig1.savefig("undirected_model.png")
    
    ### Directed graph ###
    
    # Initialize model using random directed Barabasi-Albert model
    directed_base=nx.barabasi_albert_graph(25,2).to_directed()
    directed_model=gmm.gmm(directed_base)
    directed_model.set_termination(node_ceiling)
    directed_model.set_rule(rand_add)
    
    # Run simualation with tau=4 and Poisson density for motifs
    gmm.algorithms.simulate(directed_model,4)
    
    # View results
    new_directed=directed_model.get_base()
    print(nx.info(new_directed))
    
    # Draw directed graphs
    old_dir_pos=new_pos=nx.spring_layout(directed_base)
    new_dir_pos=new_pos=nx.spring_layout(new_directed,iterations=2000)
    fig2=plt.figure(figsize=(7,10))
    fig2.add_subplot(211)
    fig2.text(0.1,0.9,"Base Directed Graph")
    nx.draw(directed_base,pos=old_dir_pos,node_size=25,with_labels=False)
    fig2.add_subplot(212)
    fig2.text(0.1,0.45, "Simualtion Results")
    nx.draw(new_directed,pos=new_dir_pos,node_size=20,with_labels=False)
    fig2.savefig("directed_model.png")
    
    # Export files
    nx.write_graphml(model.get_base(), "base_model.graphml")
    nx.write_graphml(directed_model.get_base(), "directed_model.graphml")
    nx.write_graphml(nx.petersen_graph(), "petersen_graph.graphml")
示例#2
0
def gmm_test(dt, tf, mux0, P0, YK, Qk, Rk, Nsu=20, flag_informative=True):
    global nameBit

    # add in this functionality so we can change the propagation function dependent on the nameBit ... may or may not be needed
    if not flag_informative:
        measure_argument = measurement_uninformative
        measure_jacobian = measurement_jac_uninformative
    else:
        measure_argument = measurement_enkf
        measure_jacobian = measurement_jac_gmm
    if nameBit == 1:
        # create EnKF object
        GMM = gmm.gmm(2, Nsu, Qk, Rk, eqom_gmm, jac_gmm, process_influence, measure_argument, measure_jacobian)
    elif nameBit == 2:
        # create EnKF object
        GMM = gmm.gmm(2, Nsu, Qk, Rk, eqom_gmm, jac_gmm, process_influence, measure_argument, measure_jacobian)
    elif nameBit == 3:
        # create EnKF object
        GMM = gmm.gmm(2, Nsu, Qk, Rk, eqom_gmm, jac_gmm, process_influence, measure_argument, measure_jacobian)

    nSteps = int(tf / dt) + 1
    ts = 0.0

    # initialize EnKF
    GMM.init_monte(mux0, P0, ts)

    xml = np.zeros((nSteps, 2))
    pdf = np.zeros((nSteps, GMM.aki.shape[1]))
    pdfPts = np.zeros((nSteps, 2, GMM.aki.shape[1]))
    alphai = np.zeros((nSteps, GMM.aki.shape[1]))
    Pki = np.zeros((nSteps, 2, 2, GMM.aki.shape[1]))
    tk = np.arange(0.0, tf, dt)

    t1 = time.time()
    fig = []
    for k in range(0, nSteps):
        if k > 0:
            # get the new measurement
            ym = np.array([YK[k]])
            ts = ts + dt
            # sync the ENKF, with continuous-time integration
            print("Propagate to t = %f" % (ts))
            # propagate filter
            GMM.propagate_normal(dt)
            GMM.update(ym)
            # log
        alphai[k, :] = GMM.alphai.copy()
        xml[k, :] = GMM.get_max_likelihood()
        Pki[k, :, :, :] = GMM.Pki.copy()
        (pdfPts[k, :, :], pdf[k, :]) = GMM.get_pdf()
        if k > 0:
            GMM.resample()

    t2 = time.time()
    print("Elapsed time: %f sec" % (t2 - t1))

    return (xml, pdf, pdfPts, alphai, Pki)
示例#3
0
def plot_gmm(lib=__library__, alg=__algs__[0], G=1, draw=True, show=True, draw_hist=True, save=False, xlim=None, print_result=True, training=True):
#    part = 'training' if training else 'testing'
#    if G == 0:
#        model = gmm(texts = True, gmm_txt='%s/%s/gmm_%s_%d.npz' % (lib, part, alg, G+1), hist_txt=('%s/%s/hist_%s_%d.npz' % (lib, part, alg, G+1)))
#    else:
#        model = gmm(texts = True, gmm_txt='%s/%s/gmm_%s_%d.npz' % (lib, part, alg, G), hist_txt=('%s/%s/hist_%s_%d.npz' % (lib, part, alg, G)))

    if G == 0:
        model = gmm(texts = True, gmm_txt='%s/gmm_%s_%d.npz' % (lib, alg, G+1), hist_txt=('%s/hist_%s_%d.npz' % (lib, alg, G+1)))
    else:
        model = gmm(texts = True, gmm_txt='%s/gmm_%s_%d.npz' % (lib, alg, G), hist_txt=('%s/hist_%s_%d.npz' % (lib, alg, G)))
    x = model.bins
    bin_width = x[1] - x[0]
    x = np.linspace(model.bins[0], model.bins[-1], num=10000)

    y = None        
    if G != 0:
        y = [model.pdf_integral(xi - bin_width/2., xi + bin_width/2.) for xi in x]
    
    abc = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
    G_code = abc[G]
    if G != 0:
        leg = ['Error hist', 'GMM'][::-1]
    else:
        leg = ['Error hist']
    if save:
        leg = ['\ss{%s}' % l for l in leg]
#         if G == 0: leg = None
#        images.save_latex(x, y, leg, '%s_%s_gmm_%s_%s' % (lib, part, alg, G_code), 'Error', 'Relative weight', 0.48, xlim=xlim, model=model, ylim=None)
        images.save_latex(x, y, leg, '%s_tmp_gmm_%s_%s' % (lib, alg, G_code), 'Error', 'Relative weight', 0.48, xlim=xlim, model=model, ylim=None)
    if draw:
        if draw_hist:
            pp.figure()
            pp.bar(model.bins[:-1], model.count)
            pp.hold(True)
        if xlim is not None:
            pp.xlim(xlim)
        
        # pp.stem(x, y, markerfmt='xr', linefmt='r-')
        if G != 0:
            pp.plot(x, y, color='r', linewidth=3)

#         if draw_hist:
#             model.draw_hist(fig=True, limit_hist=False)
#         pp.hold()
#         pp.title("%s - %d gmm" % (alg, G))
#         model.draw_gmm_hist(fig=False)
#         
#         if draw_hist:
#             pp.legend(['Histogram', 'GMM'])
            
    print lib, alg, G
    if print_result: model.print_results()
    if show:
        pp.show()
def plot_gmm(lib, alg=__algs__[0], G=1, draw=True, show=True, draw_hist=True, save=False, xlim=None, print_result=True):
    if G == 0:
        model = gmm(texts = True, gmm_txt='%s/gmm_%s_%d.npz' % (lib, alg, G+1), hist_txt=('%s/hist_%s_%d.npz' % (lib, alg, G+1)))
    else:
        model = gmm(texts = True, gmm_txt='%s/gmm_%s_%d.npz' % (lib, alg, G), hist_txt=('%s/hist_%s_%d.npz' % (lib, alg, G)))
    x = model.bins
    
    model.count[range(np.where(x == -5)[0][0], np.where(x == 5)[0][0]+1)] = 0
    bin_width = x[1] - x[0]
    x = np.linspace(model.bins[0], model.bins[-1], num=10000)

    y = None        
    if G != 0:
        y = [model.pdf_integral(xi - bin_width/2., xi + bin_width/2.) for xi in x]
    
    abc = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
    G_code = abc[G]
    if G != 0:
        leg = ['Error hist', 'GMM'][::-1]
    else:
        leg = ['Error hist']
    if save:
        leg = ['\ss{%s}' % l for l in leg]
#         if G == 0: leg = None
        images.save_latex(x, y, leg, '%s_gmm_%s_%s_outliers' % (lib, alg, G_code), 'Error', 'Relative weight', 0.48, xlim=xlim, model=model, ylim=None)
    if draw:
        if draw_hist:
            pp.figure()
            pp.bar(model.bins[:-1], model.count)
            pp.hold(True)
        if xlim is not None:
            pp.xlim(xlim)
        
        # pp.stem(x, y, markerfmt='xr', linefmt='r-')
        if G != 0:
            pp.plot(x, y, color='r', linewidth=3)

#         if draw_hist:
#             model.draw_hist(fig=True, limit_hist=False)
#         pp.hold()
#         pp.title("%s - %d gmm" % (alg, G))
#         model.draw_gmm_hist(fig=False)
#         
#         if draw_hist:
#             pp.legend(['Histogram', 'GMM'])
            
    print lib, alg, G
    if print_result: model.print_results()
    if show:
        pp.show()
示例#5
0
def main():
    # Load Zachary data, randomly delete nodes, and report
    zachary=nx.Graph(nx.read_pajek("karate.net")) # Do not want graph in default MultiGraph format
    zachary.name="Original Zachary Data"
    print(nx.info(zachary))
    zachary_subset=rand_delete(zachary, 15) # Remove half of the structure
    zachary_subset.name="Randomly Deleted Zachary Data"
    print(nx.info(zachary_subset))
    
    # Create model, and simulate
    zachary_model=gmm.gmm(zachary_subset,R=karate_rule,T=node_ceiling_34)
    gmm.algorithms.simulate(zachary_model,4,poisson=False,new_name="Simulation from sample")  # Use tau=4 because data is so small (it's fun!)
    
    # Report and visualize
    print(nx.info(zachary_model.get_base()))
    fig=plt.figure(figsize=(30,10))
    fig.add_subplot(131)
    nx.draw_spring(zachary,with_labels=False,node_size=45,iterations=5000)
    plt.text(0.01,-0.1,"Original Karate Club",color="darkblue",size=20)
    fig.add_subplot(132)
    nx.draw_spring(zachary_subset,with_labels=False,node_size=45,iterations=5000)
    plt.text(0.01,-0.1,"Random sample of Karate Club",color="darkblue",size=20)
    fig.add_subplot(133)
    nx.draw_spring(zachary_model.get_base(),with_labels=False,node_size=45,iterations=5000)
    plt.text(0.01,-0.1,"Simulation from random sample",color="darkblue",size=20)
    plt.savefig("zachary_simulation.png")
示例#6
0
    def dotest(k, max_iter):
        
        print('Model Number is ' + str(k))
        input("Press Enter to Continue: ")        
        for i in range(n_classes):
            train = trains[i]
            d = np.shape(train)[1]
            dparams = [None]*d
            for j in range(d):
                dparams[j] = gmm.gmm(k,[x[j] for x in train], max_iter)
            paramses[i] = dparams
        for i, params in enumerate(paramses):
            print("Class " + str(i))
            for j, param in enumerate(params):
                print("    Dimension " + str(j))
                for l, m in enumerate(param):
                    print('            Model ' + str(l))
                    print(m)

        
        input('Params calculated, Press Enter to stat true rate: ')
        testlen = len(testA) + len(testB)    
        rate1 = mylib.statTrueRate(testA, paramses, classifier,  0 ) / testlen
        rate2 = mylib.statTrueRate(testB, paramses, classifier,  1 ) / testlen
        print(rate1 + rate2)
示例#7
0
def doit(dic,priors,classes,K,diag):
    err = {'train':list(), 'test':list()}
    for k in K:
        print '*'*15,'K =',str(k),'*'*15
        nums, means, covs, nll  = {},{},{},{}
        # Build GMM models
        for dif in dic['train']:
            data = pack(dic['train'][dif])
            for i in xrange(6):
                _nums,_means,_covs,_nll = gmm.gmm(data, weights=None, K=k, hard=True, diagcov=diag)
                if(i != 0):
                    if(_nll > nll[dif]):
                        continue
                nums[dif],means[dif],covs[dif],nll[dif] =  _nums,_means,_covs,_nll
        
        criteria = [snll for dif in dic['train']]
        kwparams = [{'nums':nums[dif], 'means':means[dif], 'covs':covs[dif], 'prior':priors[dif]} for dif in dic['train']]
        
        # Evaluate
        for x in dic:
            labels, labels_est = [], []
            for dif in dic[x]:
                points = dic[x][dif]
                labels += [dif for i in xrange(len(points))]
                labels_est += optcriterion(points, classes, criteria, kwparams=kwparams, _max=False);
            e = 100.0*sum( np.array(labels) != np.array(labels_est) ) / len(labels)
            err[x].append( e )
            
            print 'Confusion marix for' , x , 'data','(K={:},diagcov={:})'.format(k,diag)
            utils.confusion(labels, labels_est, True)
            print '% Error: ', e,'\n'
    if(len(K) > 1):
        pl.plot(K,err['train'],'--', label= 'Train'+(' (diagcov=True)' if diag else ''))
        pl.plot(K,err['test'], label= 'Test'+(' (diagcov=True)' if diag else ''))
示例#8
0
def doit(dic, priors, classes, K, diag):
    err = {'train': list(), 'test': list()}
    for k in K:
        print '*' * 15, 'K =', str(k), '*' * 15
        nums, means, covs, nll = {}, {}, {}, {}
        # Build GMM models
        for dif in dic['train']:
            data = pack(dic['train'][dif])
            for i in xrange(6):
                _nums, _means, _covs, _nll = gmm.gmm(data,
                                                     weights=None,
                                                     K=k,
                                                     hard=True,
                                                     diagcov=diag)
                if (i != 0):
                    if (_nll > nll[dif]):
                        continue
                nums[dif], means[dif], covs[dif], nll[
                    dif] = _nums, _means, _covs, _nll

        criteria = [snll for dif in dic['train']]
        kwparams = [{
            'nums': nums[dif],
            'means': means[dif],
            'covs': covs[dif],
            'prior': priors[dif]
        } for dif in dic['train']]

        # Evaluate
        for x in dic:
            labels, labels_est = [], []
            for dif in dic[x]:
                points = dic[x][dif]
                labels += [dif for i in xrange(len(points))]
                labels_est += optcriterion(points,
                                           classes,
                                           criteria,
                                           kwparams=kwparams,
                                           _max=False)
            e = 100.0 * sum(
                np.array(labels) != np.array(labels_est)) / len(labels)
            err[x].append(e)

            print 'Confusion marix for', x, 'data', '(K={:},diagcov={:})'.format(
                k, diag)
            utils.confusion(labels, labels_est, True)
            print '% Error: ', e, '\n'
    if (len(K) > 1):
        pl.plot(K,
                err['train'],
                '--',
                label='Train' + (' (diagcov=True)' if diag else ''))
        pl.plot(K,
                err['test'],
                label='Test' + (' (diagcov=True)' if diag else ''))
示例#9
0
def train_model():
    """ Read all MFCC files at ./MFCC/ and use the coefficients to train
    models for each user. The model used is a Gaussian Mixture Model
    """

    models = []
    names = []
    for files in os.listdir(mfcc_path):
        print 'Training ' + files.split('.')[0] + "'s model"
        mfcc = np.loadtxt(mfcc_path + files)
        models.append(gmm.gmm(mfcc))
        names.append(files.split('.')[0])

    return models, names
示例#10
0
def train_model():
    """ Read all MFCC files at ./MFCC/ and use the coefficients to train
    models for each user. The model used is a Gaussian Mixture Model
    """

    models = []
    names = []
    for files in os.listdir(mfcc_path):
        print 'Training ' + files.split('.')[0] + "'s model"
        mfcc = np.loadtxt(mfcc_path + files)
        models.append(gmm.gmm(mfcc))
        names.append(files.split('.')[0])

    return models, names
示例#11
0
	def __init__(self, N, K):
		self.N = N
		self.K = K
		self.gmm = []
		for i in range(N):
			self.gmm.append(G.gmm(K, name='Class'+str(i)))
		self.con_mat = np.array([])
		self.recall = []
		self.meanrecall = 0.0
		self.precision = []
		self.meanprecision = 0.0
		self.fmeasure = []
		self.meanfmeasure = 0.0
		self.accuracy = 0.0
示例#12
0
def main():
    # Load Zachary data, randomly delete nodes, and report
    zachary = nx.Graph(nx.read_pajek(
        "karate.net"))  # Do not want graph in default MultiGraph format
    zachary.name = "Original Zachary Data"
    print(nx.info(zachary))
    zachary_subset = rand_delete(zachary, 15)  # Remove half of the structure
    zachary_subset.name = "Randomly Deleted Zachary Data"
    print(nx.info(zachary_subset))

    # Create model, and simulate
    zachary_model = gmm.gmm(zachary_subset, R=karate_rule, T=node_ceiling_34)
    gmm.algorithms.simulate(zachary_model,
                            4,
                            poisson=False,
                            new_name="Simulation from sample"
                            )  # Use tau=4 because data is so small (it's fun!)

    # Report and visualize
    print(nx.info(zachary_model.get_base()))
    fig = plt.figure(figsize=(30, 10))
    fig.add_subplot(131)
    nx.draw_spring(zachary, with_labels=False, node_size=45, iterations=5000)
    plt.text(0.01, -0.1, "Original Karate Club", color="darkblue", size=20)
    fig.add_subplot(132)
    nx.draw_spring(zachary_subset,
                   with_labels=False,
                   node_size=45,
                   iterations=5000)
    plt.text(0.01,
             -0.1,
             "Random sample of Karate Club",
             color="darkblue",
             size=20)
    fig.add_subplot(133)
    nx.draw_spring(zachary_model.get_base(),
                   with_labels=False,
                   node_size=45,
                   iterations=5000)
    plt.text(0.01,
             -0.1,
             "Simulation from random sample",
             color="darkblue",
             size=20)
    plt.savefig("zachary_simulation.png")
示例#13
0
def binomial_simulation(graph_set, seed=None, verbose=False):
    """
    Given a set of binomial random graphs, simulate using above growth rule
    """

    simulated_graphs = list()

    # Iterate over graph set to produce several simulations
    for i in xrange(0, len(graph_set) - 1):
        # Required to change the growth rule dynamically,
        # so we define it inside the simulation's for-loop
        termination_size = graph_set[i + 1].number_of_nodes()

        def node_ceiling(G):
            if G.number_of_nodes() >= termination_size:
                return False
            else:
                return True

        # Setup GMM
        erdos_renyi_model = gmm.gmm(graph_set[i])
        erdos_renyi_model.set_termination(node_ceiling)
        erdos_renyi_model.set_rule(binomial_growth)
        sim_name = "GMM SIMULATION-" + erdos_renyi_model.get_base().name
        gmm.algorithms.simulate(erdos_renyi_model,
                                tau=3,
                                poisson=True,
                                seed=seed,
                                new_name=sim_name)

        # Run simulation
        simualted_erdos_renyi = erdos_renyi_model.get_base()

        # Print the results to stdout
        if verbose:
            print(nx.info(graph_set[i + 1]))
            print(nx.info(simualted_erdos_renyi))
            print("")
        simulated_graphs.append(simualted_erdos_renyi)

    # Return a list of simulated graphs
    return (simulated_graphs)
示例#14
0
def binomial_simulation(graph_set, seed=None, verbose=False):
    """
    Given a set of binomial random graphs, simulate using above growth rule
    """
    
    simulated_graphs=list()
    
    # Iterate over graph set to produce several simulations
    for i in xrange(0,len(graph_set)-1):
        # Required to change the growth rule dynamically, 
        # so we define it inside the simulation's for-loop
        termination_size=graph_set[i+1].number_of_nodes()
        
        def node_ceiling(G):
            if G.number_of_nodes()>=termination_size:
                return False
            else:
                return True
        
        # Setup GMM 
        erdos_renyi_model=gmm.gmm(graph_set[i])
        erdos_renyi_model.set_termination(node_ceiling)
        erdos_renyi_model.set_rule(binomial_growth)
        sim_name="GMM SIMULATION-"+erdos_renyi_model.get_base().name
        gmm.algorithms.simulate(erdos_renyi_model,tau=3, poisson=True, seed=seed, new_name=sim_name)
        
        # Run simulation
        simualted_erdos_renyi=erdos_renyi_model.get_base()
        
        # Print the results to stdout
        if verbose:
            print(nx.info(graph_set[i+1]))
            print(nx.info(simualted_erdos_renyi))
            print("")
        simulated_graphs.append(simualted_erdos_renyi)
    
    # Return a list of simulated graphs
    return(simulated_graphs)
示例#15
0
文件: test_gmm.py 项目: h0cked/GMM
    def setUp(self):
        """Tests that all models from setup pass inspection."""

        # Node ceiling termination rule for 100 nodes
        def node_ceiling(G):
            if G.number_of_nodes() >= 100:
                return True
            else:
                return False

        # Simple random growth rule: connects random node from base to random node from new
        def rand_add(base, new):
            from numpy.random import randint

            new = nx.convert_node_labels_to_integers(new, first_label=max(base.nodes()) + 1)
            new_base = nx.compose(base, new)
            base_connector = randint(base.number_of_nodes())
            new_connector = randint(min(new.nodes()), max(new.nodes()))
            new_base.add_edge(base_connector, new_connector)
            return new_base

        # This models should pass without exception
        self.full_model = gmm.gmm(self.five_cycle, T=node_ceiling, R=rand_add)
示例#16
0
    def setUp(self):
        """Tests that all models from setup pass inspection."""

        # Node ceiling termination rule for 100 nodes
        def node_ceiling(G):
            if G.number_of_nodes() >= 100:
                return True
            else:
                return False

        # Simple random growth rule: connects random node from base to random node from new
        def rand_add(base, new):
            from numpy.random import randint
            new = nx.convert_node_labels_to_integers(
                new, first_label=max(base.nodes()) + 1)
            new_base = nx.compose(base, new)
            base_connector = randint(base.number_of_nodes())
            new_connector = randint(min(new.nodes()), max(new.nodes()))
            new_base.add_edge(base_connector, new_connector)
            return new_base

        # This models should pass without exception
        self.full_model = gmm.gmm(self.five_cycle, T=node_ceiling, R=rand_add)
示例#17
0
    def dotest(k, max_iter):

        print('Model Number is ' + str(k))
        input("Press Enter to Continue: ")
        for i in range(n_classes):
            train = trains[i]
            d = np.shape(train)[1]
            dparams = [None] * d
            for j in range(d):
                dparams[j] = gmm.gmm(k, [x[j] for x in train], max_iter)
            paramses[i] = dparams
        for i, params in enumerate(paramses):
            print("Class " + str(i))
            for j, param in enumerate(params):
                print("    Dimension " + str(j))
                for l, m in enumerate(param):
                    print('            Model ' + str(l))
                    print(m)

        input('Params calculated, Press Enter to stat true rate: ')
        testlen = len(testA) + len(testB)
        rate1 = mylib.statTrueRate(testA, paramses, classifier, 0) / testlen
        rate2 = mylib.statTrueRate(testB, paramses, classifier, 1) / testlen
        print(rate1 + rate2)
示例#18
0
def watts_strogatz_simulation(test_graph, k, p, seed=None, verbose=False):
    """
    Given a set of Watts-Strogatz "small-world" random graphs, simulate using above growth rule
    """

    # Required to change the growth rule dynamically, so
    # we define it inside the simulation function
    def watts_strogatz_growth(base, new):
        """
        Select k random nodes from new_nodes and connect each node to the nodes in base_nodes
        with probability p.  
        """

        # To keep new nodes from over-writing current ones rename the new nodes starting
        # from the last node in base
        new = nx.convert_node_labels_to_integers(
            new, first_label=max(base.nodes()) + 1)
        new_nodes = new.nodes()
        base_nodes = base.nodes()
        new_base = nx.compose(base, new)

        # Shuffle base nodes for random selection
        random.shuffle(base_nodes)

        #  We take only the first k nodes from the shuffled set of new nodes. Then, with probability p,
        # connect those nodes to all nodes in base_nodes.
        for n in base_nodes[0:k]:
            edge_test = zip(random.uniform(size=len(new_nodes)), new_nodes)
            for d, m in edge_test:
                if (d <= p):
                    new_base.add_edge(m, n)

        # Makes graph fully connceted in WS way
        while nx.number_connected_components(new_base) > 1:
            mc_nodes = nx.connected_component_subgraphs(new_base)[0].nodes()
            cc_nodes = nx.connected_component_subgraphs(new_base)[1].nodes()
            new_edges = list()
            for i in range(k):
                random.shuffle(mc_nodes)
                random.shuffle(cc_nodes)
                new_edges.append((mc_nodes[0], cc_nodes[0]))
            new_base.add_edges_from(new_edges)
        new_base.name = ""
        return (new_base)

    # Node ceiling of 1,00 nodes, ensures fully connceted graph
    def node_ceiling(G):
        if G.number_of_nodes() >= 100:
            return False
        else:
            return True

    # Setup GMM
    watts_strogatz_model = gmm.gmm(test_graph)
    watts_strogatz_model.set_termination(node_ceiling)
    watts_strogatz_model.set_rule(watts_strogatz_growth)
    sim_name = "GMM(p[" + str(p) + "])_" + watts_strogatz_model.get_base().name
    gmm.algorithms.simulate(watts_strogatz_model,
                            tau=4,
                            poisson=True,
                            seed=seed,
                            new_name=sim_name)

    # Run simulation
    simualted_watts_strogatz = watts_strogatz_model.get_base()

    # Print the results to stdout
    if verbose:
        print(nx.info(simualted_watts_strogatz))
        print("")

    # Return a list of simulated graphs
    return (simualted_watts_strogatz)
示例#19
0
def main(argin='./',adaptFlag = False):
    # output file

    # initialize EKF
    #Qkin = np.array([[20.0]])#continuous-time integration value
    Qkin = np.array([[20.0]])#Euler integration value
    Rkin = np.array([ [0.0001] ])
    GMM = gmm.gmm(2,25,Qkin,Rkin,stateDerivativeGMM,stateJacobian,stateProcessInfluence,measurementFunction,measurementJacobian)

    dt = 0.01
    tfin = 10.0
    nSteps = int(tfin/dt)
    tsim = 0.0

    muk = np.array([1.0,0.0])
    Pk = np.array([[0.1,0.0],[0.0,1.0]])
    xk = np.random.multivariate_normal(muk,Pk)
    yk = simMeasurementFunction(xk,tsim)

    # initial covariance
    GMM.init_monte(xk,Pk)

    ## true state
    xt = np.zeros((nSteps,2))
    ## discretized PDF value
    XK = np.zeros((nSteps,2,GMM.aki.shape[1]))
    pk = np.zeros((nSteps,GMM.aki.shape[1]))
    alphai = np.zeros((nSteps,GMM.aki.shape[1]))
    Pki = np.zeros((nSteps,2,2,GMM.aki.shape[1]))
    yt = np.zeros(nSteps)
    tplot = np.zeros(nSteps)

    t1 = time.time()
    for k in range(nSteps):
        # log
        tplot[k] = tsim
        xt[k,:] = xk.copy()
        (XK[k,:,:],pk[k,:]) = GMM.get_pdf()
        alphai[k,:] = GMM.alphai.copy()
        Pki[k,:,:,:] = GMM.Pki.copy()
        # propagate filter
        GMM.propagate_normal(dt)
        # simulate
        y = sp.odeint(stateDerivative,xk,np.array([tsim,tsim+dt]),args=([],) )
        xk = y[-1,:].copy()
        # update time
        tsim = tsim + dt
        # measurement
        ymeas = simMeasurementFunction(xk,tsim)
        # store measurement
        yt[k] = ymeas[0]
        # update EKF
        GMM.update(ymeas)
        print("%f,%f" % (tsim,ymeas[0]))
        # resample?
        GMM.resample()
    t2 = time.time()

    print('Completed simulation in %f seconds' % (t2-t1))

    print(XK.shape,tplot.shape)

    # len(tplot) x Ns matrix of times
    tMesh = np.kron(np.ones((GMM.aki.shape[1],1)),tplot).transpose()

    # find the max of the PDF for the maximum liklihood estimate
    xml = np.zeros((nSteps,2))
    Pkk = np.zeros((nSteps,2))
    for k in range(nSteps):
    	idmax = np.argmax(pk[k,:])
    	xml[k,:] = XK[k,:,idmax].transpose()
    	# compute the covariance
    	mu = np.zeros(2)
    	for j in range(GMM.aki.shape[1]):
    		mu = mu + alphai[k,j]*XK[k,:,j]
    	Pxx = np.zeros((2,2))
    	for j in range(GMM.aki.shape[1]):
    		Pxx = Pxx + alphai[k,j]*(Pki[k,:,:,j] + np.outer(XK[k,:,j]-mu,XK[k,:,j]-mu))
		#print("%f,%f|%f,%f,%f,%f" % (mu[0],mu[1],Pxx[0,0],Pxx[0,1],Pxx[1,0],Pxx[1,1]))
		Pkk[k,0] = Pxx[0,0]
		Pkk[k,1] = Pxx[1,1]

    fig = plt.figure()

    ax = []
    for k in range(4):
		if k < 2:
			nam = 'x' + str(k+1)
		else:
			nam = 'e' + str(k-1)
		ax.append( fig.add_subplot(2,2,k+1,ylabel=nam) )
		if k < 2:
			if k == 0:
				# plot the discrete PDF as a function of time
				mex = tMesh.reshape((len(tplot)*GMM.aki.shape[1],))
				mey = XK[:,0,:].reshape((len(tplot)*GMM.aki.shape[1],))
				mez = pk.reshape((len(tplot)*GMM.aki.shape[1],))
			elif k == 1:
				# plot the discrete PDF as a function of time
				mex = tMesh.reshape((len(tplot)*GMM.aki.shape[1],))
				mey = XK[:,1,:].reshape((len(tplot)*GMM.aki.shape[1],))
				mez = pk.reshape((len(tplot)*GMM.aki.shape[1],))

			idx = mez.argsort()
			mexx,meyy,mezz = mex[idx],mey[idx],mez[idx]

			cc = ax[k].scatter(mexx,meyy,c=mezz,s=20,edgecolor='')
			fig.colorbar(cc,ax=ax[k])
			# plot the truth
			ax[k].plot(tplot,xt[:,k],'b-')
		elif k < 4:
			ax[k].plot(tplot,xt[:,k-2]-xml[:,k-2],'b-')
			ax[k].plot(tplot,3.0*np.sqrt(Pkk[:,k-2]),'r--')
			ax[k].plot(tplot,-3.0*np.sqrt(Pkk[:,k-2]),'r--')
		ax[k].grid()
    fig.show()

    raw_input("Return to exit")

    print("Completed test_enky.py")
    return
示例#20
0
    pos = np.zeros((img.shape[0], img.shape[1], 2))
    for i in range(pos.shape[0]):
        for j in range(pos.shape[1]):
            pos[i, j, :] = [i, j]
    pos = pos.reshape((-1, 2))
    data = np.hstack((pos, np.reshape(img, (-1, 3))))
    # k_means
    res = k_means(data, 3, iter_times=20, dist_func=dist)
    tag = res[:, -1]
    tag = np.reshape(tag, c.shape)
    plt.figure(2)
    plt.imshow(tag)
    plt.axis('off')
    plt.show()
    # gmm
    res = gmm(data, 3, iter_times=20)
    tag = res[:, -1]
    tag = np.reshape(tag, c.shape)
    plt.figure(3)
    plt.imshow(tag)
    plt.axis('off')
    plt.show()
    # dbscan
    res = dbscan(data, 10, 100, dist_func=dist)
    tag = res[:, -1]
    tag = np.reshape(tag, c.shape)
    plt.figure(4)
    plt.imshow(tag)
    plt.axis('off')
    plt.show()
示例#21
0
def barabasi_albert_simulation(test_graph, m, seed=None, verbose=False):
    """
    Given a set of Barabasi-Albert "preferential attachment" random graphs, simulate using above growth rule
    """

    # Required to change the growth rule dynamically, so
    # we define it inside the simulation function
    def barabasi_albert_growth(base, new):
        """
        Select m random nodes from new_nodes and connect each node to the nodes in base_nodes
        as a function of the degree of the node in base_nodes.  The basic "preferential 
        attachment" model.
        """

        # To keep new nodes from over-writing current ones rename the new nodes starting
        # from the last node in base
        new = nx.convert_node_labels_to_integers(
            new, first_label=max(base.nodes()) + 1)
        new_nodes = new.nodes()
        base_nodes = base.nodes()
        new_base = nx.compose(base, new)

        # Shuffle new_nodes
        random.shuffle(new_nodes)

        # Create edge test based on degree centrality
        base_degree = nx.degree_centrality(base).items()

        # Create "preferential attachment" structure by connecting m
        # nodes from new structure to nodes in base as a function of
        # base nodes' degree centrality
        for i in xrange(m):
            edge_made = False
            while edge_made is False:
                # Randomly select a node in base and add connection
                # based on its degree centrality
                p = random.uniform()
                j = random.randint(0, len(base_nodes))
                if p <= base_degree[j][1]:
                    k = random.randint(
                        len(new_nodes))  # Randomly select a new node
                    new_base.add_edge(new_nodes[k], base_degree[j][0])
                    edge_made = True
        return (new_base)

    # Simple node ceiling of 1,000 nodes
    def node_ceiling(G):
        if G.number_of_nodes() >= 1000:
            return False
        else:
            return True

    # Setup GMM
    barabasi_albert_model = gmm.gmm(test_graph)
    barabasi_albert_model.set_termination(node_ceiling)
    barabasi_albert_model.set_rule(barabasi_albert_growth)
    sim_name = "GMM SIMULATION-" + barabasi_albert_model.get_base().name
    gmm.algorithms.simulate(barabasi_albert_model,
                            tau=3,
                            poisson=True,
                            seed=seed,
                            new_name=sim_name)

    # Run simulation
    simualted_barabasi_albert = barabasi_albert_model.get_base()

    # Print the results to stdout
    if verbose:
        print(nx.info(simualted_barabasi_albert))
        print("")

    # Return a list of simulated graphs
    return (simualted_barabasi_albert)
示例#22
0
def execute(lib=__library__):
    start = datetime.datetime.now()
    print "START TIME", str(start)
    
    # fetch shape of images
    img = images.fetch_disp("bm", 1, __library__)
    sh = (int(np.shape(img)[0]), int(np.shape(img)[1]))
    shape = (1080, 1920)

    # check if ground is available
    img = images.fetch_ground(1, __library__)
    ground_avail = img is not None

    if ground_avail:
        # shape = list(np.array(shape) * 2)
        shape = (shape[0], shape[1])
    else:
        # print shape
        shape = (shape[0], shape[1] * 2)

    print "starting..."
    
    global __begin__
    global __end__

    for alg in __algs__:
        if __timer__:
            tm = timer(__begin__, __end__)
        mx = 0

        overall_diff = np.array([], dtype=__dtype__)
        file_num = 0
        file_count = 0
        max_count = 10

        found_count = 0
        full_count = 0
#        cv2.imshow('1',  images.fetch_disp(alg, 1, __library__))
#        cv2.imshow('1b', (images.fetch_ground(1, __library__).astype('float') - images.fetch_disp(alg, 1, __library__)[:,:,0].astype('float')).astype('uint8'))
#        cv2.imshow('2',  images.fetch_disp(alg, 2, __library__))
#        cv2.imshow('2b', (images.fetch_ground(2, __library__).astype('float') - images.fetch_disp(alg, 2, __library__)[:,:,0].astype('float')).astype('uint8'))
        cv2.waitKey()
        if __save_data_only__:
            print 'fetching data'
	    print __library__
            for i in range(__begin__, __end__ + 1):
                orig = images.fetch_orig(i, __library__)
                ground = images.fetch_ground(i, __library__)
                disp = images.fetch_disp(alg, i, __library__)
                
#                cv2.imshow('disp', disp)
#                cv2.imshow('diff', (ground.astype('float') - disp[:,:,0].astype('float')).astype('uint8'))
#                cv2.waitKey()

		if orig is None: print 'orig is None'
		if ground is None: print 'ground is None'
		if disp is None: print 'disp is None'
		
                if disp is not None and ground_avail:
                    if len(disp.shape) == 3: disp = disp[:,:,0]
                    
                    diff = ((ground.astype(__dtype__)) - (disp.astype(__dtype__))).astype(__dtype__)
                    full_count += np.product(diff.shape)
		    

                    # exclude not calculated
                    flat = diff[np.where(disp != 0)]
                    flat_ground = ground[np.where(disp != 0)]
                    flat = flat[np.where(flat_ground != 0)]

		    found_count += len(np.nonzero(flat.flatten())[0])
                    
                    # exclude incalculable values
                    # diff[ground > 16*6] = 0

                    diff = np.array(diff, dtype=__dtype__)

                    overall_diff = np.concatenate((overall_diff, flat))

                    if file_count == max_count:
			print 'saving', '%s/data/data_%s_%d.npz' % (lib, alg, file_num), i
                        np.savez('%s/data/data_%s_%d.npz' % (lib, alg, file_num), data=overall_diff)
                        overall_diff = np.array([], dtype=__dtype__)
                        
                        file_count = 1
                        file_num += 1
                    else:
                        file_count += 1
        
                if __timer__:
                    tm.progress(i)
        
            print 'found', found_count, 'of', full_count
	    
        if full_count != 0:
	            print float(found_count) / full_count * 100.0 
#             np.savez('%s/data/data_%s_%d.npz' % (lib, alg, file_num), data=overall_diff)
            
    if __save_data_only__: return

    data_files = list()
    total = __end__ - __begin__ + 1
    # range(int(math.ceil(total / float(max_count))))
    
    for g in __G__:
        print
        print 'calculating for G =', g
        for alg in __algs__:
            print 'Calculating for alg', alg

            data_files = ['%s/data/data_%s_%d.npz' % (lib, alg, i) for i in range(0, int(math.floor((__end__ - __begin__) / 10.)))]
            print data_files
            # data_files = ['%s/data/data_%s_%d.npz' % (lib, alg, i) for i in range(int(__begin__ / 10.), int(math.ceil(__end__ / 10.)))]
        #if __library__ == 'tsukuba': # data_files=data_files[:-1]
            print 'creating model'
            model = gmm(data_files, debug=__dbg__, history=True, timer=__timer__, limit=limit)
            model.fit_model(g)
            model.save_results(library=__library__, alg=alg, hist=True)
            # model.draw_results()
            print "END TIME ", str(datetime.datetime.now())
            print "TIME PASSED", str(datetime.datetime.now() - start)
            print
            # pp.show()

        print "done"
示例#23
0
            plt.scatter(points[:, 0], points[:, 1])
        plt.xlabel('x')
        plt.ylabel('y')
        plt.title(
            "k-means clusters(with euclidean distance) over data{}".format(k))
        print("method: kmeans - data{} - euclidean distance".format(k))
        etr = Estimater()
        etr.get_internal_index(results, euclidean_distance)
        results = np.hstack((results, data[:, -1:]))
        etr.get_external_index(results)
    plt.show()

    # gmm
    for k in range(1, 4):
        data = np.load("data/{}.npy".format(k))
        results = gmm(data[:, :-1], k=3)
        plt.subplot(1, 3, k)
        for i in range(10):
            points = results[results[:, -1] == i]
            plt.scatter(points[:, 0], points[:, 1])
        plt.xlabel('x')
        plt.ylabel('y')
        plt.title("gmm clusters over data{}".format(k))
        print("method: gmm - data{}".format(k))
        etr = Estimater()
        etr.get_internal_index(results, euclidean_distance)
        results = np.hstack((results, data[:, -1:]))
        etr.get_external_index(results)
    plt.show()

    # dbscan
示例#24
0
文件: gmm.py 项目: vishalbelsare/GMM
            return self.base
        else:
            return self.rule(self.base,new)
            
    def am_gmm(self):
        """Simple function to test if object is a gmm"""
        return self.am_gmm
            

if __name__ == '__main__':
    # Create most basic GMM object with five node cycle graph as base.
    
    import gmm
    
    G=nx.cycle_graph(5)
    model=gmm.gmm(G)
    
    # Using five node cycle graph, create gmm object with node ceiling
    # termination rule of 100 nodes
    
    def degree_ceiling(G):
        if G.number_of_nodes()>=100:
            return True
        else:
            return False
    
    model=gmm.gmm(G,degree_ceiling)
    
    # Finally, add a simple random growth rule
    
    def rand_add(base, new):
示例#25
0
 def setUp(self):
     # Most basic GMM; no growth or termination rules.
     self.base_model = gmm.gmm(self.five_cycle)
     self.base_directed = gmm.gmm(
         copy.deepcopy(self.five_cycle).to_directed())
示例#26
0
    lda = dimred.LDA(data, classes, labels, center=True, scale=True)
    projData = lda.transform(data)

    x = projData[0, :]
    y = projData[1, :]

    points = splitClasses(data, classes, labels)
    gaussians = [density.Gaussian(data=p) for p in points]
    means = np.array([g.mean() for g in gaussians]).T
    covs = np.array([g.cov() for g in gaussians])
    nums = np.array([p.shape[1] for p in points])
    weights, nll = gmm.calcresps(data, nums, means, covs, hard=hard)

    nums, means, covs, nll = gmm.gmm(data,
                                     weights,
                                     K=3,
                                     hard=hard,
                                     diagcov=False)
    numsd, meansd, covsd, nlld = gmm.gmm(data,
                                         weights,
                                         K=3,
                                         hard=hard,
                                         diagcov=True)

    print 'Question 2'
    print 'NLL for diagcov=False: ', nll
    print 'NLL for diagcov=True:  ', nlld
    print 'The diagcov=False seems to be the better choice.'
    print 'Because we have enough data per class we can use a full estimate '
    print 'of the covariance matrix without over-fitting. '
    print 'These extra parameters allow us to make a better model which is '
示例#27
0
 def setUp(self):
     # Most basic GMM; no growth or termination rules.
     self.base_model=gmm.gmm(self.five_cycle)
     self.base_directed=gmm.gmm(copy.deepcopy(self.five_cycle).to_directed())
示例#28
0
def gmm_demo():
    df = load_data()
    df = extract_features(df)
    features_list = list(df.columns.values)[1:]
    gmm(df, features_list)
示例#29
0
    ib = []

    for row in data:

        Ib = eval(row[6])
        ib.append(Ib)

    ib = np.array(ib)

    def ibase():
        return ib

    C = 3

    (mu_est, sigma_est, p_est, counter, difference) = gmm(ib,C,1e-5)

    print('___ Ibase Parameters ___')
    print('------Means--------')
    print('mu_1=%1.4f'%mu_est[0])
    print('mu_2=%1.4f'%mu_est[1])
    print('mu_3=%1.4f'%mu_est[2])

    print('------Variance--------')
    print('sigma_1=%1.4f'%sigma_est[0])
    print('sigma_2=%1.4f'%sigma_est[1])
    print('sigma_3=%1.4f'%sigma_est[2])

    print('------Weights-------')
    print('W_1=%1.4f'%p_est[0])
    print('W_2=%1.4f'%p_est[1])
示例#30
0
文件: main.py 项目: secg95/IBIO4490
    if "xy" in opts.color and opts.method != "watershed":
        temp = img.copy()
        img = np.zeros((img.shape[0], img.shape[1], img.shape[2] + 2))
        img[:, :, 0:temp.shape[2]] = temp
        img[:, :, temp.shape[2]] = np.array(range(img.shape[0])).reshape(
            img.shape[0], 1)
        img[:, :, temp.shape[2] + 1] = np.array(range(img.shape[1])).reshape(
            1, img.shape[1])

    # execute the requested clustering method
    if "watershed" in opts.method:
        clustering = watershed(img, opts.k)
    if "kmeans" in opts.method:
        clustering = kmeans(img, opts.k)
    if "gmm" in opts.method:
        clustering = gmm(img, opts.k)
    if "hierarchical" in opts.method:
        clustering = hierarchical(img, opts.k)

    # read the truth
    truth = sio.loadmat(opts.img_file.replace('jpg', 'mat'))
    truth = truth['groundTruth'][0, 4][0][0]['Segmentation']
    # plot and save the results with a nice title
    title = opts.img_file.split("/")[-1].replace(".jpg", "") + "_k=" + str(
        opts.k) + "_" + opts.method
    showSaveResults(imgoriginal, clustering, truth, title)

    # calculate and report mutual information
    print("Mutual information (more is better): " +
          str(mutual_info_score(truth.flatten(), clustering.flatten())))
    print(
示例#31
0
def gmm_demo():
    df = load_data()
    df = extract_features(df)
    features_list = list(df.columns.values)[1:]
    gmm(df, features_list)
示例#32
0
if __name__ == '__main__':
    from pandas import read_csv
    import gmm

    #Load datasets using pandas interface
    data1 = read_csv("BankNoteAuthentication.csv")
    data2 = read_csv("WineQuality-WhiteWine.csv")

    data1.dropna(axis="columns", how="any", inplace=True)
    data1.dropna(axis="columns", how="any", inplace=True)

    #Data set 1 clustered

    trainee1 = data1[["skewness", "curtosis"]]

    model1 = gmm.gmm(clusters=5, iter=25, randSeed=42)
    normalized1 = model1.normalizeSet(trainee1)

    model1.trainModel(normalized1)
    model1.draw(normalized1,
                model1.u,
                model1.sig,
                xAxis=trainee1.columns.values[0],
                yAxis=trainee1.columns.values[1])

    #Data set 2 clustered
    trainee2 = data2[["total sulfur dioxide", "chlorides"]]

    model2 = gmm.gmm(clusters=2, iter=50, randSeed=42)
    normalized2 = model2.normalizeSet(trainee2)
示例#33
0
 def __init__(self):
     self.gmm = gmm()
示例#34
0
def main():
    # Ensure all data has been downloaded and processed
    #utils.download_trips_dataset()
    #for y in utils.YEARS:
    #   utils.load_trips_dataframe(y)
    #   process_trips(trips_df)

    np.random.seed(1)

    rc('font', family='serif')

    station_info = utils.load_station_info()
    start_time_matrix, station_idx, time_idx, time_at_idx = utils.load_start_time_matrix(
    )
    stop_time_matrix, _, _ = utils.load_stop_time_matrix()
    start_time_matrix = start_time_matrix.astype(np.int16)
    stop_time_matrix = stop_time_matrix.astype(np.int16)
    inverse_station = {v: k for k, v in station_idx.items()}
    flow_matrix = stop_time_matrix - start_time_matrix

    print("Total data points (excluding inactive stations): {:,}".format(
        np.sum(utils.construct_active_stations_by_bucket(start_time_matrix))))

    # Figure 1 Total Volume
    plot_total_start_trips(start_time_matrix, time_idx)

    # Figure 2 Avg Week
    select_stations = [195, 511]
    plot_avg_week_for_stations(start_time_matrix, station_idx, station_info,
                               select_stations,
                               "Number of trips started at station over week",
                               "avg_week_start_time.pdf")
    plot_avg_week_for_stations(stop_time_matrix, station_idx, station_info,
                               select_stations,
                               "Number of trips stopped at station over week",
                               "avg_week_stop_time.pdf")
    plot_avg_week_for_stations(
        flow_matrix,
        station_idx,
        station_info,
        select_stations,  #360, 195, 146, 432, 161, 497, 517], 
        "Net change in bikes at station over week",
        "avg_week_flow_time.pdf",
        "Number of Arriving Trips")

    # # Cluster stations
    print("Clustering stations")
    avg_weekly_flow = utils.get_station_agg_trips_over_week(
        flow_matrix, np.mean)
    cluster_assignments, means, ppc = gmm.gmm(avg_weekly_flow,
                                              K=3,
                                              posterior_predictive_check=True)

    # Figure 4 Cluster Means
    # Plot weekly graph for mean of each cluster
    plot_cluster_means(means, time_at_idx)

    # Figure 6 Station Map
    # Plot clustered stations on map
    plot_map(cluster_assignments, station_info, station_idx, inverse_station)

    # Figure 7 Posterior Predictive Check
    # Plot the posterior predictive check
    random_indices = randint(0, flow_matrix.shape[0], size=150)
    plot_posterior_predictive_check(
        flow_matrix[random_indices], ppc[0][random_indices],
        "Posterior Predictive Check on average flow", "post_pred_check.pdf")

    # Predictions
    # Figure 8 Accuracy
    plot_average_predictor_error(start_time_matrix, flow_matrix)
    plot_seasonal_average_predictor_error(start_time_matrix, flow_matrix)
    plot_cluster_predictor_error(start_time_matrix, flow_matrix,
                                 cluster_assignments, means)

    # Figure 9 Seasonal Prediction
    plot_predicted_total_start_trips(start_time_matrix)

    # Figure 10 Sample across three seasons
    plot_cluster_predictor_sample_error(start_time_matrix, flow_matrix,
                                        cluster_assignments, means)

    # Figures not used in the paper

    # Some interesting stations: 3412, 3324, 3285, 3286, 3153, 360, 195, 2023, 3095, 432, 511, 438
    plot_avg_week_for_stations(
        flow_matrix,
        station_idx,
        station_info, [360, 195, 497, 146, 161],
        "Net change in bikes at station over week (normalized)",
        "normalized_avg_week_flow_time.pdf",
        normalize=True)
    plot_avg_week_for_stations(
        flow_matrix,
        station_idx,
        station_info, [360, 195, 497, 146, 161],
        "Net change in bikes at station over week (normalized, rounded)",
        "normalized_round_avg_week_flow_time.pdf",
        normalize=True,
        round=True)
    plot_avg_week_for_stations(
        flow_matrix,
        station_idx,
        station_info,
        None,
        "Net change in bikes at station over week (normalized)",
        "normalized_all_avg_week_flow_time.pdf",
        normalize=True)
    plot_avg_week_for_stations(flow_matrix, station_idx, station_info, None,
                               "Net change in bikes at station over week ",
                               "all_avg_week_flow_time.pdf")
    plot_avg_week_for_stations(
        flow_matrix,
        station_idx,
        station_info,
        None,
        "Net change in bikes at station over week (normalized, rounded)",
        "normalized_round_all_avg_week_flow_time.pdf",
        normalize=True,
        round=True)

    # Plot clustered stations in 2d
    plot_clustered_stations_2d(avg_weekly_flow, cluster_assignments, means,
                               inverse_station)

    # Predictions
    plot_predicted_flow_baseline(flow_matrix)
示例#35
0
def watts_strogatz_simulation(test_graph, k, p, seed=None, verbose=False):
    """
    Given a set of Watts-Strogatz "small-world" random graphs, simulate using above growth rule
    """
    
    # Required to change the growth rule dynamically, so 
    # we define it inside the simulation function
    def watts_strogatz_growth(base, new):
        """
        Select k random nodes from new_nodes and connect each node to the nodes in base_nodes
        with probability p.  
        """

        # To keep new nodes from over-writing current ones rename the new nodes starting
        # from the last node in base
        new=nx.convert_node_labels_to_integers(new,first_label=max(base.nodes())+1)
        new_nodes=new.nodes()
        base_nodes=base.nodes()
        new_base=nx.compose(base,new)

        # Shuffle base nodes for random selection
        random.shuffle(base_nodes)

        #  We take only the first k nodes from the shuffled set of new nodes. Then, with probability p, 
        # connect those nodes to all nodes in base_nodes.
        for n in base_nodes[0:k]:
            edge_test=zip(random.uniform(size=len(new_nodes)), new_nodes)
            for d,m in edge_test:
                if (d <= p):
                    new_base.add_edge(m,n)
                    
        # Makes graph fully connceted in WS way
        while nx.number_connected_components(new_base)>1:
            mc_nodes=nx.connected_component_subgraphs(new_base)[0].nodes()
            cc_nodes=nx.connected_component_subgraphs(new_base)[1].nodes()
            new_edges=list()
            for i in range(k):
                random.shuffle(mc_nodes)
                random.shuffle(cc_nodes)
                new_edges.append((mc_nodes[0], cc_nodes[0]))
            new_base.add_edges_from(new_edges)
        new_base.name=""
        return(new_base)
    
    # Node ceiling of 1,00 nodes, ensures fully connceted graph
    def node_ceiling(G):
        if G.number_of_nodes()>=100:
            return False
        else:
            return True

    # Setup GMM
    watts_strogatz_model=gmm.gmm(test_graph)
    watts_strogatz_model.set_termination(node_ceiling)
    watts_strogatz_model.set_rule(watts_strogatz_growth)
    sim_name="GMM(p["+str(p)+"])_"+watts_strogatz_model.get_base().name
    gmm.algorithms.simulate(watts_strogatz_model,tau=4, poisson=True, seed=seed, new_name=sim_name)
    
    # Run simulation
    simualted_watts_strogatz=watts_strogatz_model.get_base()
    
    # Print the results to stdout
    if verbose:
        print(nx.info(simualted_watts_strogatz))
        print("")
        
    # Return a list of simulated graphs
    return(simualted_watts_strogatz)
示例#36
0
    data, classes, labels = readData(os.path.join('..','data','wine.data'))
    
    lda = dimred.LDA(data, classes, labels, center=True, scale=True)
    projData = lda.transform(data)
    
    x = projData[0,:]
    y = projData[1,:]
    
    points = splitClasses(data,classes,labels)
    gaussians = [density.Gaussian(data=p) for p in points]
    means = np.array([g.mean() for g in gaussians]).T
    covs = np.array([g.cov() for g in gaussians])
    nums = np.array([ p.shape[1] for p in points ])
    weights,nll = gmm.calcresps(data, nums, means, covs, hard=hard)
    
    nums, means, covs, nll = gmm.gmm(data, weights, K=3, hard=hard, diagcov=False)
    numsd,meansd,covsd,nlld = gmm.gmm(data, weights, K=3, hard=hard, diagcov=True)
    
    print 'Question 2'
    print 'NLL for diagcov=False: ', nll
    print 'NLL for diagcov=True:  ', nlld
    print 'The diagcov=False seems to be the better choice.'
    print 'Because we have enough data per class we can use a full estimate '
    print 'of the covariance matrix without over-fitting. '
    print 'These extra parameters allow us to make a better model which is '
    print 'evident in the differance in negative-log-likelihood(NNL).\n'
    
    weights,nll = gmm.calcresps(data, nums, means, covs, hard=hard)

    pl.figure()
    
示例#37
0
labeled_images = preprocess.load_preprocessed(path + "/Processed", FEATURES_FILE) 
labeled_images_seg = preprocess.load_preprocessed(seg_path, FEATURES_SEG_FILE) 

cluster_counts = accuracy_rate.counts_per_cluster(labeled_images)
cluster_counts_seg = accuracy_rate.counts_per_cluster(labeled_images_seg)

img_features = pca.pca_features(FEATURES_FILE)
features_df = pd.DataFrame(img_features)
labeled_features = labeled_images.join(features_df)

img_features_seg = pca.pca_features(FEATURES_SEG_FILE)
features_seg_df = pd.DataFrame(img_features_seg)
labeled_features_seg = labeled_images.join(features_seg_df)


gmm_labels, gmm_score = gmm.gmm(img_features)
gmm_labels_seg, gmm_score_seg = gmm.gmm(img_features_seg)

kmeans_labels, kmeans_score = kmeans.kmeans(img_features)
kmeans_labels_seg, kmeans_score_seg = kmeans.kmeans(img_features_seg)

labeled = accuracy_rate.assigned_images(labeled_images, gmm_labels, kmeans_labels, 0)
accuracy_rate.output_files(labeled, 0)
voted_labels = accuracy_rate.match_labels(labeled, cluster_counts, 0)

labeled_seg = accuracy_rate.assigned_images(labeled_images_seg, gmm_labels_seg, kmeans_labels_seg, SEG_NUM)
accuracy_rate.ouput_files(labeled_seg, SEG_NUM)
voted_labels_seg = accuracy_rate.match_labels(labeled_seg, cluster_counts_seg, SEG_NUM)

gmm_rate, kmeans_rate = get_accuracy_rate(labeled, voted_labels)
gmm_rate_seg, kmeans_rate_seg = get_accuracy_rate(labeled_seg, voted_labels_seg)