示例#1
0
 def dbscan(self,D,eps,MinPts):
     self.dataSet = D 
     C = -1
     Noise = cluster('Noise')
     fig = plt.figure()
     ax = plt.axes(projection='3d')
     ax.set_title('dbscaned data')
     for point in D:
         if point not in self.visited:
             self.visited.append(point)
             NeighbourPoints = self.regionQuery(point,eps)
             
             if len(NeighbourPoints) < MinPts:
                 Noise.addPoint(point)
             else:
                 name = 'Cluster'+str(self.count)
                 C = cluster(name)
                 self.count+=1
                 self.expandCluster(point,NeighbourPoints,C,eps,MinPts)
                 ax.plot(C.getX(),C.getY(),C.getZ(),'.',label=name)
     if len(Noise.getPoints())!=0:
         ax.plot(Noise.getX(),Noise.getY(),Noise.getZ(),'.',label='Noise')
     plt.legend(loc='lower left')
     #plt.show()
     plt.savefig('dbscaned.png')
示例#2
0
 def dbscan(self,D,eps,MinPts):
     self.dataSet = D
     
     title(r'DBSCAN Algorithm', fontsize=18)
     xlabel(r'Dim 1',fontsize=17)
     ylabel(r'Dim 2', fontsize=17)
     
     C = -1
     Noise = cluster('Noise')
     
     for point in D:
         if point not in self.visited:
             self.visited.append(point)
             NeighbourPoints = self.regionQuery(point,eps)
             
             if len(NeighbourPoints) < MinPts:
                 Noise.addPoint(point)
             else:
                 name = 'Cluster'+str(self.count);
                 C = cluster(name)
                 self.count+=1;
                 self.expandCluster(point,NeighbourPoints,C,eps,MinPts)
                 
                 plot(C.getX(),C.getY(),'o',label=name)
                 hold(True)
     
     if len(Noise.getPoints())!=0:
         plot(Noise.getX(),Noise.getY(),'x',label='Noise')
         
     hold(False)
     legend(loc='lower left')
     grid(True)
     show()
示例#3
0
def execute2(parametersobject):
    parametersobject.read_derived()
    if not parametersobject.parameterdic['Replot_only']:
        analyse_trjs(parametersobject)
        if parametersobject.parameterdic['Plot_energy']:
            get_energies(parametersobject)
        contactmap_getdata(parametersobject)
        cluster(parametersobject)
    graph_angles(parametersobject)
    contactmap_draw(parametersobject)
    cluster_dotplot(parametersobject)
示例#4
0
def main():
    screen_size = [1120, 630]
    screen = display.set_mode(screen_size)
    display.set_caption("Antivirus")
    main_scr = image.load('data/main.png').convert()
    main_bar = image.load('data/bar/bar.png').convert()
    main_btn = [image.load('data/buttons/main.png').convert(), image.load('data/buttons/main1.png').convert()]
    share_btn = [image.load('data/buttons/share.png').convert(), image.load('data/buttons/share1.png').convert()]
    bar_btn = [image.load('data/bar/btn.png').convert(), image.load('data/bar/btn1.png').convert(),
               image.load('data/bar/btno.png').convert()]
    bar_btn1 = [image.load('data/bar/btn10.png').convert(), image.load('data/bar/btn11.png').convert(),
               image.load('data/bar/btn1o.png').convert()]
    inp_box = [image.load('data/input_box/input_box.png').convert(),
               image.load('data/input_box/input_box1.png').convert(),
               image.load('data/input_box/input_box_o.png').convert(),
               image.load('data/input_box/browse.png').convert(),
               image.load('data/input_box/browse1.png').convert()]
    sett = [image.load('data/bar/sett.png').convert(), image.load('data/bar/sett1.png').convert(),
               image.load('data/bar/setto.png').convert()]
    rp = [image.load('data/buttons/radio_buttons/panelc.png').convert(),
          image.load('data/buttons/radio_buttons/panelc1.png').convert(),
          image.load('data/buttons/radio_buttons/panelo.png').convert()]
    rb = [image.load('data/buttons/radio_buttons/btn.png').convert(), image.load('data/buttons/radio_buttons/btn2.png').convert(),
          image.load('data/buttons/radio_buttons/btn1.png').convert(), image.load('data/buttons/radio_buttons/btn3.png').convert()]

    x = radio_panel([radio_button([383, 310, 300, 30], "Full Scan", screen, rb), radio_button([383, 340, 300, 30], "Only Executable Scan", screen, rb)],
                   [383, 300, 300, 30], screen, rp)
    c1 = cluster([button([443, 240, 193, 50], func.test, screen, main_btn), x], [input_box([250, 200, 580, 30], screen, inp_box)],
                 [5, 5, 100, 40], screen, bar_btn)
    c2 = cluster([button([443, 240, 193, 50], func.ps, screen, share_btn)], [input_box([250, 200, 580, 30], screen, inp_box)],
                 [110, 5, 100, 40], screen, bar_btn1)
    c3 = cluster([], [], [1075, 5, 40, 40], screen, sett)
    c = cluster_panel([c1, c2, c3])

    screen_opened = True

    fps = time.Clock()
    while screen_opened:

        screen.blit(main_scr,[0, 0])
        screen.blit(main_bar,[0, 0])
        c.draw()
        display.flip()

        for cur in event.get():
            if cur.type == MOUSEBUTTONUP:
                if cur.button == 1:
                    c.click()
            c.input(cur)
            if cur.type == QUIT:
                screen_opened = False
                quit()

        fps.tick(30)
示例#5
0
def execute2(parametersobject):
	parametersobject.read_derived()
	if not parametersobject.parameterdic['Replot_only']:
		analyse_trjs(parametersobject)
		if parametersobject.parameterdic['Plot_energy']:
			get_energies(parametersobject)
		contactmap_getdata(parametersobject)
		cluster(parametersobject)
	graph_angles(parametersobject)
	contactmap_draw(parametersobject)
	cluster_dotplot(parametersobject)
示例#6
0
def gamma2(x, h, N):
    """Computes the second moment of the cluster size for a given DGFF sample and threshold h."""
    y = levelset(x, h)
    z = cluster(y, N)
    clusterarray = np.bincount(np.bincount(z)[:-1])
    gamma = (clusterarray * np.arange(len(clusterarray))**2).sum()
    return gamma
示例#7
0
 def searchBK(self, file_in, lmsi):
     file_in.seek(0)
     candidate = []
     while True:
         line1 = file_in.readline()
         if not line1: break
         line2 = file_in.readline().strip()
         line1_list = line1.split('\t')
         line2_list = line2.split('\t')
         readseq1 = line1_list[9].upper()
         readseq2 = line2_list[9].upper()
         pairedread = pairedRead(line1_list[0], line1_list[1], line1_list[2], line1_list[3], line1_list[4],
                                 line1_list[5], line1_list[6], line1_list[7], line1_list[8], readseq1, \
                                 line2_list[0], line2_list[1], line2_list[2], line2_list[3], line2_list[4],
                                 line2_list[5], line2_list[6], line2_list[7], line2_list[8], readseq2)
         if (pairedread.read1.CIGAR == '100M'
                 and pairedread.read2.CIGAR != '100M'
                 and pairedread.read2.isRepete() is False and
             (int(pairedread.read1.POS) < int(pairedread.read2.POS))):
             bkun = pairedread.read2.getbkunit()
             if bkun[0] and bkun[1]:
                 candidate.append(bkun)
     cpos = cluster(candidate, 100, 0)
     print len(cpos)
     pos = []
     for i in range(len(cpos)):
         pos.append(cpos[i][0])
     return pos
def noise_removal(pcd_list, ratio=0.75):

	"""
	Remove noise by keeping the largest clusters after clustering with DBScan.
	First, we keep the largest cluster C1, then we check if the size of the
	second largest cluster C2 is bigger than C1.size*ratio. If it is, we also
	keep C2 and do the test with C3, and so on.
	"""

	# Cluster the points
	clusters = cluster(pcd_list, 'dbscan', {'eps':0.00005})
	# Get the clusters sizes
	sizes = clustersSize(clusters)
	# Get the largest clust
	largest_c = getCluster(clusters, sizes[0][0])

	# For each cluster size
	for s in sizes[1:]:
		# is the present cluster bigger than the largest cluster times the
		# ratio
		if s[1] >= sizes[0][1]*ratio:
			# keep it if it is the case
			largest_c += getCluster(clusters, s[0])
		else:
			break

	return largest_c
示例#9
0
    def __init__(self, set_name):
        self.job_root = "job_configs/%s" % set_name
        self.job_files = glob.glob(r'job_configs/%s/job*.json' % set_name)
        self.num_jobs = len(self.job_files)

        self.job_set = []
        self.load_job_set()

        self.clust = cluster(CLUSTER)
示例#10
0
def muL_ss():
    """
    多进程进行开始搜索搜索,多进程调用start_search
    :return:
    """
    tbw_dict = cluster()
    # {bid1:[(title1,blog1,word1),(title2,blog2,word2)], bid2:[(title4,blog4,word4),(title5,blog5,word5)]}
    for (bid, tbw_list) in tbw_dict.items():
        search_process = multiprocessing.Process(target=start_search, args=(bid, tbw_list))  # 一个bid作为一个进程搜索
        search_process.start()
    search_process.join()
示例#11
0
def muL_ss():
    """
    多进程进行开始搜索搜索,多进程调用start_search
    :return:
    """
    tbw_dict = cluster()
    # {bid1:[(title1,blog1,word1),(title2,blog2,word2)], bid2:[(title4,blog4,word4),(title5,blog5,word5)]}
    for (bid, tbw_list) in tbw_dict.items():
        search_process = multiprocessing.Process(
            target=start_search, args=(bid, tbw_list))  # 一个bid作为一个进程搜索
        search_process.start()
    search_process.join()
示例#12
0
def k_means(data, k, iterations):
    """
    Actual K-mean algorithm
    :param data: the data to run the algorithm on. Should be a numpy array.
    :param k: the number of clusters you want to use.
    :return: The list of cluster centers
    """

    # Get min and max of data
    minima, maxima = np.min(data, axis=0), np.max(data, axis=0)

    # Draw cluster centers from uniform distribution
    centers_x = np.random.uniform(minima[0], maxima[0], k)
    centers_y = np.random.uniform(minima[1], maxima[1], k)

    # Initialise the clusters:
    coordinates = [np.array([centers_x[i], centers_y[i]]) for i in range(k)]
    clusters = []
    for center in coordinates:
        clusters.append(cluster(center))
        print(center)

    # Keep track of cluster index for each point
    point_to_cluster = [-1] * data.shape[0]

    # Main loop
    for it in range(iterations):

        # Assign each point to its closest cluster
        for pt_idx in range(data.shape[0]):
            point = data[pt_idx, :]
            closest = get_closest(point, clusters)
            if closest != -1:
                clusters[closest].add_member(pt_idx, point)

                # Remove the point from its previous cluster
                old_idx = point_to_cluster[pt_idx]
                if old_idx != -1:
                    clusters[old_idx].remove_member(old_idx)

                # Update the record of the point cluster
                point_to_cluster[pt_idx] = closest

        # Now that each point has been assigned to its nearest cluster
        # we can update the clusters centers

        for cluster in clusters:
            cluster.update_center()

    for cluster in clusters:
        print(cluster.center)

    return
示例#13
0
def transportBench(numObjects, objectSize, transport, uncached=False):
    f = partial(runTransportBench, numObjects, objectSize, uncached)
    stats = cluster(f,
                    numBackups=0,
                    replicas=0,
                    transport=transport,
                    timeout=240)
    for line in open('%s/mcp.%s.log' % (stats['run'], hosts[0][0])):
         m = re.match('.*METRICS: (.*)$', line)
         if m:
             stats.update(eval(m.group(1)))
    return stats
示例#14
0
def transportBench(numObjects, objectSize, transport, uncached=False):
    f = partial(runTransportBench, numObjects, objectSize, uncached)
    stats = cluster(f,
                    numBackups=0,
                    replicas=0,
                    transport=transport,
                    timeout=240)
    for line in open('%s/mcp.%s.log' % (stats['run'], hosts[0][0])):
        m = re.match('.*METRICS: (.*)$', line)
        if m:
            stats.update(eval(m.group(1)))
    return stats
示例#15
0
 def bk_unit(self, file_in):
     print "Geting_breakpoint&unit>>>>>>>>>>>>>>>>>>>>>>>"
     file_in.seek(0)
     bklist = []
     while True:
         line1 = file_in.readline()
         if not line1: break
         line2 = file_in.readline().strip()
         line1_list = line1.split('\t')
         line2_list = line2.split('\t')
         readseq1 = line1_list[9].upper()
         readseq2 = line2_list[9].upper()
         pairedread = pairedRead(line1_list[0], line1_list[1], line1_list[2], line1_list[3], line1_list[4],
                                 line1_list[5], line1_list[6], line1_list[7], line1_list[8], readseq1, \
                                 line2_list[0], line2_list[1], line2_list[2], line2_list[3], line2_list[4],
                                 line2_list[5], line2_list[6], line2_list[7], line2_list[8], readseq2)
         '''readlen1 = len(pairedread.read1.SEQ)
         readlen2 = len(pairedread.read2.SEQ)
         if pairedread.read1.CIGAR == str(readlen1) + "M" and pairedread.read2.CIGAR != str(readlen2) + "M" and \
                         pairedread.read2.CIGAR != "*" and int(pairedread.read1.POS) < int(pairedread.read2.POS):
             # if pairedread.read1.verify(ref) and pairedread.read2.getmsiunit():
             if pairedread.read2.getmsiunit()[0]:
                 bkun = pairedread.read2.getbkunit()  # 返回(断点,unit)
                 bklist.append(bkun)
         elif pairedread.read2.CIGAR == str(readlen2) + "M" and pairedread.read1.CIGAR != str(readlen1) + "M" and \
                         pairedread.read1.CIGAR != "*" and int(pairedread.read2.POS) < int(pairedread.read1.POS):
             # if pairedread.read2.verify(ref) and pairedread.read1.getmsiunit():
             if pairedread.read1.getmsiunit()[0]:
                 bkun = pairedread.read1.getbkunit()  # 返回(断点,unit)
                 bklist.append(bkun)
         else:
             continue'''
         if pairedread.read1.isMap() and not pairedread.read2.isMap() and  pairedread.read2.CIGAR != "*" and \
             int(pairedread.read1.POS) < int(pairedread.read2.POS) and pairedread.read2.getmsiunit()[0]:
             bkun = pairedread.read2.getbkunit()  # 返回(unit,断点)
             bklist.append(bkun)
         elif not pairedread.read1.isMap() and pairedread.read2.CIGAR != "*"and pairedread.read2.isMap() and \
             int(pairedread.read2.POS) < int(pairedread.read1.POS) and pairedread.read1.getmsiunit()[0]:
             bkun = pairedread.read1.getbkunit()  # 返回(unit,断点)
             bklist.append(bkun)
         else:
             continue
     rebu = cluster(bklist, 30, 1, alpha=0.1)
     keys = []
     bklist = []
     for r in rebu:
         bklist.append(r[0])
     self.bklist = dict(bklist)
     self.unitdic = list(self.bklist.keys())
     return self.bklist, self.unitdic
示例#16
0
 def do_classify(self,num,count):
     self.count=count
     self.jihe=cluster(self.pure_messages,self.count)
     self.jihe.update_bytime(num)
     i=0
     while(i<count):
         message_temp=[]
         for r in self.jihe.clus[i]:
             temp=RawMessage(r.contain)
             message_temp.append(temp)
         symbol=Symbol(messages=message_temp)
         Format.splitAligned(symbol,doInternalSlick=True)
         self.pure_symbols.append(symbol)
         i=i+1
示例#17
0
def recall(events):
        
    # step into the cluster
    cluster()
    
    if event is      
        
    while True:
        now = datetime.utcnow()
        
        # spawn a timedelta object
        t = now - when
        
        # establish the threshold in milliseconds
        threshold = t.total_seconds() * 1000

        # what did I - just - perceive?
        if threshold < 5:
            print now
            print 'Threshold crossed'
            print threshold
            
        else :
            print 'threshold not crossed'
            
        # who else perceived it?
        


        
# live and die gracefully.    
if __name__ == '__main__':
    try:
        recall()
    except KeyboardInterrupt:
        print 'stop memories'
 def add_tensorboard(originData, target, feature_I, feature_V,
                     pretrain_model_time_dir, iterations):
     print "cluster_thread: begin"
     F_I_cluster_np, F_V_cluster_np = cluster(originData, target,
                                              feature_I, feature_V,
                                              pretrain_model_time_dir,
                                              iterations)
     summary = sess.run(summary_merge_cluster_img,
                        feed_dict={
                            F_I_cluster_img:
                            np.expand_dims(F_I_cluster_np, axis=0),
                            F_V_cluster_img:
                            np.expand_dims(F_V_cluster_np, axis=0)
                        })
     training_writer.add_summary(
         summary, tf.train.global_step(sess, global_step))
     print "cluster_thread: added img to summary"
示例#19
0
    def __init__(self, set_name):
        self.job_root = "job_configs/%s" % set_name
        self.job_files = glob.glob(r'job_configs/%s/job*.json' % set_name)
        self.num_jobs = len(self.job_files)

        self.job_set = [[] for i in range(ARRIVAL_MAX)
                        ]  # simulate one day of 1440 minutes
        self.load_job_set()

        self.job_queue = []
        self.starve_queue = []
        self.job_running = []

        self.clust = cluster(CLUSTER)

        # statistical variable
        self.total_time = 0
示例#20
0
 def dbscan(D,eps,MinPts):
     dataSet = D
     
     C = -1
     Noise = None
     
     for i in D:
         if i not in visited:
             visited.append(point)
             NeighbourPoints = _regionQuery(point,eps)
             
             if len(NeighbourPoints) < MinPts:
                 Noise.addPoint(point)
             else:
                 name = 'Cluster'+str(count);
                 C = cluster(name)
                 count+=1;
                 expandCluster(point,NeighbourPoints,C,eps,MinPts)
示例#21
0
    def dbscan(self, D, ids, eps, MinPts):
        self.dataSet = ids
        self.Distance_values = D
        #print self.Distance_values
        C = -1
        for file in ids:
            if file not in self.visited:
                #self.visited.append(file)
                NeighbourPoints = self.regionQuery(file, eps)

                if len(NeighbourPoints) < MinPts:
                    print "noise"
                else:
                    name = 'Cluster' + str(self.count)
                    #modify the name to filename
                    C = cluster(name)
                    self.count += 1
                    self.expandCluster(file, NeighbourPoints, C, eps, MinPts)
示例#22
0
文件: main.py 项目: dpfau/rl
def main(args=None):
    model = Maze(9, 9, [(4, 0), (4, 1), (4, 2), (4, 3), (4, 5), (4, 6), (4, 7), (4, 8)])
    learner = QLearner(model)

    for i in range(10):
        g = makeLocalGraph(learner)
        (nodes, v) = cluster(g)
        learner.plan(nodes, v)

        # visualization
        tab = [[(x, y) in model.walls for y in range(model.w)] for x in range(model.h)]
        for (node, val) in zip(sorted(g.nodes()), v):
            tab[node[0]][node[1]] = val#(val * val.conjugate()).real**.5
        tab += NP.min(tab)
        tab /= NP.max(tab)
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.imshow(tab, cmap=cm.coolwarm, interpolation='nearest')
        plt.show()
示例#23
0
def returnDataBackend(df, per=.2, nf=0):
    def Mbyt(w):
        if w[-1] == 'M':
            return float(w[-2])
        elif w[-1] == 'G':
            return float(w[-2]) * 1000
        else:
            try:
                return float(w[-1])
            except:
                print w

    with open(df) as f:
        if nf > 0:
            z = [(0.0, 1, 1)] * nf
        ls = f.readlines()
        q = 0
        for i, line in enumerate(ls):
            if i == 0:
                continue
            if ':' in line:
                break
            if nf > 0 and q == nf:
                break
            w = line.split()
            tmp = float(w[-1])
            z[q] = (int(w[0]), int(w[1]), tmp)
            q = q + 1
        t = sorted(z[0:q], reverse=True, key=lambda x: x[2])
        if nf == 0:
            l = int(i * per)
            t = t[0:l]
        else:
            t = t[0:q]
    I = mySparse(t)
    #~ dat = '/Users/kshadi/Documents/Cisco_kamal/zDATA3/2016/11/16/t10.csv'
    #~ print 'SQ...'
    #~ save_sqlite(t,dat)
    #~ raw_input('database created>')
    print 'Clustering ...'
    C = cluster(t, I, '_res_1', gradient=False)
    C.cluster()
示例#24
0
    def __init__(self, set_name, cluster_dict, schedule_conf):
        self.job_root = "job_configs/%s" % set_name
        self.set_name = set_name
        self.schedule_conf = schedule_conf

        # statistical variable
        self.total_time = 0
        self.task_dist = []
        self.turn_on_dist = []

        self.clust = cluster(cluster_dict)

        if "online" in self.set_name:
            self.ARRIVAL_MAX = 1440
        else:
            self.ARRIVAL_MAX = 1

        self.job_set = [[] for i in range(self.ARRIVAL_MAX)
                        ]  # simulate one day of 1440 minutes
        self.load()
示例#25
0
    def analyze(self, time_dependent = False):
        #A: look for core points
        for pt in self.points:
            if pt.visited:
                continue
            pt.visited = True

            neighbors = self.queryRegion(pt, time_dependent)
            if(len(neighbors) < self.minPts):
                #mark noise temporarily
                self.noise.append(pt)
                pt.classification = "NOISE"
            else:
                #mark core points
                c = cluster("cluster_"+str(self.cluster_count))
                print"HI"
                self.cluster_count += 1
                pt.classification = "CORE"
                self.expandCluster(pt, neighbors, c, time_dependent)

        #B: reiterate and look for reachable points in noise
        new_noise = []
        for pt in self.noise:
            for neighbor in self.queryRegion(pt, time_dependent):
                if(neighbor.classification == "CORE"):
                    #if this point is reachable, then add to the cluster
                    pt.classification = "REACHABLE"
                    c = self.findCluster(neighbor.cluster_name)
                    c.addPoint(pt)
                    break
            if pt.classification == "NOISE":
                new_noise.append(pt)
        self.noise = new_noise

        #C:return list of centroids of clusters
        centroids = []
        for c in self.clusters:
            centroids.append(c.getCentroid())
        return centroids
示例#26
0
 def incrementalAdd(self, p, eps, Minpts):
     self.num = self.num + 1
     print("\nADDING point " + str(self.num))
     self.visited = []
     self.newCores = []
     UpdSeedIns = []
     foundClusters = []
     NeighbourPoints = self.regionQuery(p, eps)
     if len(NeighbourPoints) >= Minpts:
         self.newCores.append(p)
     self.visited.append(p)
     for pt in NeighbourPoints:
         if pt not in self.visited:
             self.visited.append(pt)
             np = self.regionQuery(pt, eps)
             if len(np) >= Minpts:
                 for n in np:
                     if n not in NeighbourPoints:
                         NeighbourPoints.append(n)
                 if pt not in self.curCores:
                     self.newCores.append(pt)
     for core in self.newCores:
         corehood = self.regionQuery(core, eps)
         for elem in corehood:
             if self.regionQuery(elem, eps) >= Minpts:
                 if elem not in UpdSeedIns:
                     UpdSeedIns.append(elem)
     if len(UpdSeedIns) < 1:
         self.Noise.addPoint(p)
     else:
         findCount = 0
         for seed in UpdSeedIns:
             for clust in self.Clusters:
                 if clust.has(seed):
                     findCount += 1
                     if clust.name not in foundClusters:
                         foundClusters.append(clust.name)
                         break
         if len(foundClusters) == 0:
             name = 'Cluster' + str(self.count)
             C = cluster(name)
             self.count += 1
             self.expandCluster(UpdSeedIns[0],
                                self.regionQuery(UpdSeedIns[0], eps), C,
                                eps, Minpts)
         elif len(foundClusters) == 1:
             originalCluster = -1
             newCluster = -1
             for c in self.Clusters:
                 if c.name == foundClusters[0]:
                     originalCluster = c
                     newCluster = c
             newCluster.addPoint(p)
             if len(UpdSeedIns) > findCount:
                 for seed in UpdSeedIns:
                     if not newCluster.has(seed):
                         newCluster.addPoint(seed)
             self.Clusters.remove(originalCluster)
             self.Clusters.append(newCluster)
         else:
             masterCluster = -1
             originalCluster = -1
             for c in self.Clusters:
                 if c.name == foundClusters[0]:
                     masterCluster = c
                     originalCluster = c
             for clusname in foundClusters:
                 for clus in self.Clusters:
                     if clus.name == clusname:
                         for cluspoints in clus.getPoints():
                             if not masterCluster.has(cluspoints):
                                 masterCluster.addPoint(cluspoints)
             if len(UpdSeedIns) > findCount:
                 for seed in UpdSeedIns:
                     if not masterCluster.has(seed):
                         masterCluster.addPoint(seed)
             self.Clusters.remove(originalCluster)
             self.Clusters.append(masterCluster)
示例#27
0
def process_clusters(pIDarr,xarr,yarr,valarr,avg3arr,avg5arr,pmarr,eIDarr,etimearr,elonarr,elatarr,epAvgarr,epStdarr,eorxarr,eoryarr,eorzarr,xIDarr,xtimearr,xL1arr,xL2arr,xdrparr):
  totclarr = []
  for i in range(0, len(xarr)):
    #Get huge indexed clustered array for whole run
    totclarr.append(cluster(xarr[i], yarr[i]))
  pIDclu    = [];  pxclu    = [];  pyclu    = []
  pvalclu   = [];  pavg3clu = [];  pavg5clu = []
  pmclu     = [];  eIDclu   = [];  etimeclu = []
  elonclu   = [];  elatclu  = [];  epavgclu = []
  epstdclu  = [];  eorxclu  = [];  eoryclu  = []
  eorzclu   = [];  xIDclu   = [];  xtimeclu = []
  xL1clu    = [];  xL2clu   = [];  xdrpclu  = []
  totclucnt = []
  for i in range(0, len(totclarr)):
    #Sort out the x, y, and vals by their respective clusters now
    tpID, tpx, tpy, tpval, tpavg3, tpavg5, tpm, teID, tetime, telon, telat, tepAvg, tepStd, teorx, teory, teorz, txID, txtime, txL1, txL2, txdrp = cluster_merge(pIDarr[i],xarr[i],yarr[i],valarr[i],avg3arr[i],avg5arr[i],pmarr[i],eIDarr[i],etimearr[i],elonarr[i],elatarr[i],epAvgarr[i],epStdarr[i],eorxarr[i],eoryarr[i],eorzarr[i],xIDarr[i],xtimearr[i],xL1arr[i],xL2arr[i],xdrparr[i],totclarr[i])
    #Used for histogram
    xval = []
    yval = []
    for n in range(0, len(xarr[i])):
      for m in range(0, int(valarr[i][n])):
        xval.append(xarr[i][n])
        yval.append(yarr[i][n])  
    #If desired, can output individual event image
    #Need to manually input num and set range
    '''
    num = 712
    if i == num:
      #Create an image for each cluster in the frame
      for f in xrange(len(tmpx)):
        #Create a zoomed in image for each individual cluster
        hval, xedges, yedges = np.histogram2d(xval, yval, bins = 40, range=[[tmpx[f][0]-19,tmpx[f][0]+20],[tmpy[f][0]-19,tmpy[f][0]+20]])
        extent = [yedges[0], yedges[-1], xedges[-1], xedges[0]]
        plt.figure()
        plt.imshow(hval, extent = extent, interpolation = 'nearest')
        plt.gca().invert_yaxis()
        plt.colorbar()
        plt.xlabel("pixel x")
        plt.ylabel("pixel y")
        plt.title('Pixel Value (Cluster: %s)'%(f+1))
      hval, xedges, yedges = np.histogram2d(xval, yval, bins = 350, range=[[0,350],[0,350]])
      extent = [yedges[0], yedges[-1], xedges[-1], xedges[0]]
      plt.figure()
      plt.imshow(hval, extent = extent, interpolation = 'nearest')
      plt.gca().invert_yaxis()
      plt.colorbar()
      plt.xlabel("pixel x")
      plt.ylabel("pixel y")
      plt.title('Pixel Value (Frame: %s)'%(i))
      #plt.show()
    #'''
    #Make full run cluster x, y, val, and count arrays 
    #for each event
    pIDclu.append(tpID)
    pxclu.append(tpx)
    pyclu.append(tpy)
    pvalclu.append(tpval)
    pavg3clu.append(tpavg3)
    pavg5clu.append(tpavg5)
    pmclu.append(tpm)
    eIDclu.append(teID)
    etimeclu.append(tetime)
    elonclu.append(telon)
    elatclu.append(telat)
    epavgclu.append(tepAvg)
    epstdclu.append(tepStd)
    eorxclu.append(teorx)
    eoryclu.append(teory)
    eorzclu.append(teorz)
    xIDclu.append(txID)
    xtimeclu.append(txtime)
    xL1clu.append(txL1)
    xL2clu.append(txL2)
    xdrpclu.append(txdrp)
    totclucnt.append(len(tpx))
  #get all of the clusters lengths for each individual frame
  #and throw into an array of arrays
  totclulen = []
  #Find the cluster lengths for each cluster in run
  for i in xrange(len(pxclu)):
    length = cluster_length(pxclu[i], pyclu[i])
    for j in xrange(len(length)):
      totclulen.append(length[j])
  return pIDclu,pxclu,pyclu,pvalclu,pavg3clu,pavg5clu,pmclu,eIDclu,etimeclu,elonclu,elatclu,epavgclu,epstdclu,eorxclu,eoryclu,eorzclu,xIDclu,xtimeclu,xL1clu,xL2clu,xdrpclu,totclucnt,totclulen
示例#28
0
# import all defined functions
from cluster import *

# ============ Parameters ================

slo = False  # titles and labels in slovenian or english language
# make analysis
ks = [5, 8]  # number of clusters
logscale = [True]

# ============ Run functions using these parameters ============

print("\n\t10: clustering and analysis of clusters on logarithmic scale")
# cluster(folder=10, ks=ks, slo=slo, logscale=logscale)
print("\n\t100: clustering and analysis of clusters on logarithmic scale")
cluster(folder=100, ks=ks, slo=slo, logscale=logscale)
print("\n\t1000: clustering and analysis of clusters on logarithmic scale")
# cluster(folder=1000, ks=ks, slo=slo, logscale=logscale)
print("\n\t10000: clustering and analysis of clusters on linear scale")
cluster(folder=10000, ks=ks, slo=slo, logscale=logscale)
# size of marker ... log(N)
示例#29
0
def process_clusters(pIDarr, xarr, yarr, valarr, avg3arr, avg5arr, pmarr,
                     eIDarr, etimearr, elonarr, elatarr, epAvgarr, epStdarr,
                     eorxarr, eoryarr, eorzarr, xIDarr, xtimearr, xL1arr,
                     xL2arr, xdrparr):
    totclarr = []
    for i in range(0, len(xarr)):
        #Get huge indexed clustered array for whole run
        totclarr.append(cluster(xarr[i], yarr[i]))
    pIDclu = []
    pxclu = []
    pyclu = []
    pvalclu = []
    pavg3clu = []
    pavg5clu = []
    pmclu = []
    eIDclu = []
    etimeclu = []
    elonclu = []
    elatclu = []
    epavgclu = []
    epstdclu = []
    eorxclu = []
    eoryclu = []
    eorzclu = []
    xIDclu = []
    xtimeclu = []
    xL1clu = []
    xL2clu = []
    xdrpclu = []
    totclucnt = []
    for i in range(0, len(totclarr)):
        #Sort out the x, y, and vals by their respective clusters now
        tpID, tpx, tpy, tpval, tpavg3, tpavg5, tpm, teID, tetime, telon, telat, tepAvg, tepStd, teorx, teory, teorz, txID, txtime, txL1, txL2, txdrp = cluster_merge(
            pIDarr[i], xarr[i], yarr[i], valarr[i], avg3arr[i], avg5arr[i],
            pmarr[i], eIDarr[i], etimearr[i], elonarr[i], elatarr[i],
            epAvgarr[i], epStdarr[i], eorxarr[i], eoryarr[i], eorzarr[i],
            xIDarr[i], xtimearr[i], xL1arr[i], xL2arr[i], xdrparr[i],
            totclarr[i])
        #Used for histogram
        xval = []
        yval = []
        for n in range(0, len(xarr[i])):
            for m in range(0, int(valarr[i][n])):
                xval.append(xarr[i][n])
                yval.append(yarr[i][n])
        #If desired, can output individual event image
        #Need to manually input num and set range
        '''
    num = 712
    if i == num:
      #Create an image for each cluster in the frame
      for f in xrange(len(tmpx)):
        #Create a zoomed in image for each individual cluster
        hval, xedges, yedges = np.histogram2d(xval, yval, bins = 40, range=[[tmpx[f][0]-19,tmpx[f][0]+20],[tmpy[f][0]-19,tmpy[f][0]+20]])
        extent = [yedges[0], yedges[-1], xedges[-1], xedges[0]]
        plt.figure()
        plt.imshow(hval, extent = extent, interpolation = 'nearest')
        plt.gca().invert_yaxis()
        plt.colorbar()
        plt.xlabel("pixel x")
        plt.ylabel("pixel y")
        plt.title('Pixel Value (Cluster: %s)'%(f+1))
      hval, xedges, yedges = np.histogram2d(xval, yval, bins = 350, range=[[0,350],[0,350]])
      extent = [yedges[0], yedges[-1], xedges[-1], xedges[0]]
      plt.figure()
      plt.imshow(hval, extent = extent, interpolation = 'nearest')
      plt.gca().invert_yaxis()
      plt.colorbar()
      plt.xlabel("pixel x")
      plt.ylabel("pixel y")
      plt.title('Pixel Value (Frame: %s)'%(i))
      #plt.show()
    #'''
        #Make full run cluster x, y, val, and count arrays
        #for each event
        pIDclu.append(tpID)
        pxclu.append(tpx)
        pyclu.append(tpy)
        pvalclu.append(tpval)
        pavg3clu.append(tpavg3)
        pavg5clu.append(tpavg5)
        pmclu.append(tpm)
        eIDclu.append(teID)
        etimeclu.append(tetime)
        elonclu.append(telon)
        elatclu.append(telat)
        epavgclu.append(tepAvg)
        epstdclu.append(tepStd)
        eorxclu.append(teorx)
        eoryclu.append(teory)
        eorzclu.append(teorz)
        xIDclu.append(txID)
        xtimeclu.append(txtime)
        xL1clu.append(txL1)
        xL2clu.append(txL2)
        xdrpclu.append(txdrp)
        totclucnt.append(len(tpx))
    #get all of the clusters lengths for each individual frame
    #and throw into an array of arrays
    totclulen = []
    #Find the cluster lengths for each cluster in run
    for i in xrange(len(pxclu)):
        length = cluster_length(pxclu[i], pyclu[i])
        for j in xrange(len(length)):
            totclulen.append(length[j])
    return pIDclu, pxclu, pyclu, pvalclu, pavg3clu, pavg5clu, pmclu, eIDclu, etimeclu, elonclu, elatclu, epavgclu, epstdclu, eorxclu, eoryclu, eorzclu, xIDclu, xtimeclu, xL1clu, xL2clu, xdrpclu, totclucnt, totclulen
示例#30
0
        print 'Usage:', sys.argv[0], ' (0: reset 1: append) index_file video_list'
        exit(-1)
    
    IS_APPEND = int(sys.argv[1])
    index_file = sys.argv[2]
    video_list_file = sys.argv[3]
    video_list = open(video_list_file).read().splitlines()

    index = {}
    if IS_APPEND:
        with open(index_file, 'rb') as handle:
            prev_index = pickle.load(handle)
            print prev_index    

    for video_name in video_list:
        video_name = video_name.split('.')[0]
        print video_name 
        if IS_APPEND:
            if video_name in index:
                continue

        
        gt_nodes = load_turker_labels(video_name)
        clusters, linkage_matrix = cluster(gt_nodes)
        index[video_name] = clusters


    with open(index_file, 'wb') as handle:
        pickle.dump(index, handle)
        
示例#31
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    np.save(f'{args.dataset}_labels', labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    cluster_interval = args.cluster_interval
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    if args.early_stop:
        stopper = EarlyStopping(patience=100)
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        device = features.device
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    g = data.graph
    # add self loop
    if not args.no_self_loop:
        print('add self-loop')
        g.remove_edges_from(nx.selfloop_edges(g))
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    n_edges = g.number_of_edges()
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    # # create GCN model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = create_model(args.arch,
                         g,
                         num_layers=args.num_layers,
                         in_dim=in_feats,
                         num_hidden=args.num_hidden,
                         num_classes=n_classes,
                         heads=heads,
                         activation=F.elu,
                         feat_drop=args.in_drop,
                         attn_drop=args.attn_drop,
                         negative_slope=args.negative_slope,
                         residual=args.residual)

    print(model)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # Step 1. initilization with GCN
    # init graph feat
    dur = []
    centroid_emb, hidden_emb, cluster_ids = [], [], []
    att = []
    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # cluster
        # forward
        if epoch < args.init_feat_epoch:
            # logits = model(features)
            logits, hidden_h = model(features)
        else:
            if epoch == args.init_feat_epoch or epoch % cluster_interval == 0:
                cluster_ids_x, cluster_centers = cluster(
                    X=hidden_h.detach(),
                    num_clusters=args.cluster_number,
                    distance='cosine',
                    method=args.cluster_method
                )  # TODO: fix zero norm embedding
                centroid_emb.append(cluster_centers.detach().cpu().numpy())
                hidden_emb.append(hidden_h.detach().cpu().numpy())
                cluster_ids.append(cluster_ids_x.detach().cpu().numpy())
                pass
            logits, hidden_h = model(features, cluster_ids_x, cluster_centers,
                                     att)
            # logits, hidden_h = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        # loss.backward(retain_graph=True)
        loss.backward(retain_graph=False)
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)
        if args.fastmode:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if args.early_stop:
                if stopper.step(val_acc, model):
                    break
        # acc = evaluate(model, features, labels, val_mask)
        print(
            "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
            "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                          val_acc,
                                          n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features, labels, test_mask)
    print("Test accuracy {:.2%}".format(acc))
    prefix = 'embedding'
    np.save(Path(prefix, f'{args.dataset}_centroid_emb'),
            np.array(centroid_emb))
    np.save(Path(prefix, f'{args.dataset}_hidden_emb'), np.array(hidden_emb))
    np.save(Path(prefix, f'{args.dataset}_att'), np.array(att))
    np.save(Path(prefix, f'{args.dataset}_cluster_ids'), np.array(cluster_ids))
示例#32
0
#
# 14-Feb-14: Version 1.0: Updated
# 22-Jan-14: Version 1.0: Created
#
################################################################################
################################################################################

import sys
from cluster import *

if __name__ == "__main__":

   if (len(sys.argv) == 2): 

      _Directory = sys.argv[1]
      
      _Cluster = cluster(_Directory)
      
      _Cluster.build_dictionary()

      #print "Dictionary size: " + str(_Cluster.dictionary_size())
      
      #_Cluster.print_dictionary()

      _Cluster.build_cluster()

      #print "Unique Hashes in Dictionary " + str(_Cluster.unique_hash_count())

      #_Cluster.print_cluster()

      _Cluster._write_cluster()
示例#33
0
from cluster import *
tftp_one = rdpcap("final_last.pcap")
start_str = []
for t in tftp_one:
    ss = str(t)
    ss1 = ss[54:]
    if (len(ss1) > 0):
        start_str.append(ss1)
for s in start_str:
    print repr(s)
print "\r\n"
start_list = []
for s in start_str:
    nn = t_node(0, s)
    start_list.append(nn)
jihe = cluster(start_list, 5)
jihe.update_bytime(20)
print "kkk"
i = 0
file_object = open('thefile_six.txt', 'w+')
while (i < 5):
    print i
    file_object.write(repr(i))
    file_object.write("\r\n")
    print repr(jihe.cores[i].contain)
    file_object.write(repr(jihe.cores[i].contain))
    file_object.write("\r\n")
    print "clui"
    for r in jihe.clus[i]:
        file_object.write(repr(r.contain))
        file_object.write("\r\n")
示例#34
0
        corrRightVal, _ = randomPickRight(start, end, teX, teY, indexTableVal)
        X_left = teX[start:end].reshape([-1, 28, 28, 1]) / 255
        F_V_matrix, F_I_matrix = sess.run(
            [F_V_left, F_I_left],
            feed_dict={
                image_real_left: X_left,
                image_real_right: corrRightVal.reshape([-1, 28, 28, 1]) / 255
            })
        if start == 0:
            image_real_left_agg = X_left
            F_V_matrix_agg = F_V_matrix
            F_I_matrix_agg = F_I_matrix
        else:
            image_real_left_agg = np.concatenate((image_real_left_agg, X_left),
                                                 axis=0)
            F_V_matrix_agg = F_V_matrix = np.concatenate(
                (F_V_matrix_agg, F_V_matrix), axis=0)
            F_I_matrix_agg = F_I_matrix = np.concatenate(
                (F_I_matrix_agg, F_I_matrix), axis=0)
        iterations += 1
    cluster(image_real_left_agg,
            teY[0:len(teY)],
            F_I_matrix_agg,
            F_V_matrix_agg,
            args.pretrain_model_time_dir,
            iterations,
            is_tensorboard=False)

    # cluster(image_real_left_agg, teY[0: len(teY)], F_I_matrix_agg, F_V_matrix_agg,
    #         args.pretrain_model_time_dir, iterations)
示例#35
0
import sys

from utils import *
from prepare import *
from model import *
from train import *
from evaluate import *
from cluster import *

if __name__ == "__main__":
    action = sys.argv[1]  # prepare, train, eval, predict
    data_dir = sys.argv[2]  # file of train, test data
    csv_file = sys.argv[3]  # csv file of info OR output file name
    model_path = sys.argv[4]  # model path save or load OR dataloader
    # TODO: add batch_size, num_epochs as args

    if action == 'prepare':
        prepare(action, data_dir, csv_file, model_path)
    elif action == 'train':
        dataloader, data_size = prepare(action, data_dir, csv_file)
        train(model_path, dataloader, data_size)
    elif action == 'eval':
        dataloader, data_size = prepare(action, data_dir, csv_file)
        evaluate(model_path, dataloader, data_size)
    elif action == 'cluster':
        dataloader, data_size = prepare(action, data_dir, csv_file)
        cluster(model_path, dataloader, data_size)
    elif action == 'predict':
        # TODO: dataloader  = prepare(action, data_dir)
        predict(model_path, dataloader)
示例#36
0
 def __init__(self):
     self.Noise = cluster('Noise')
示例#37
0
 def incrementalDelete(self, p, eps, Minpts):
     print "\nPoint to Delete : " + str(p)
     self.newCores = []
     obsoleteCores = []
     UpdSeedDel = []
     Neighbourhood = self.regionQuery(p, eps)
     Neighbourhood.remove(p)
     self.dataSet.remove(p)
     if p in self.curCores:
         self.curCores.remove(p)
         obsoleteCores.append(p)
     for core in self.curCores:
         np = self.regionQuery(core, eps)
         if len(np) >= Minpts:
             self.newCores.append(core)
         else:
             obsoleteCores.append(core)
     for core in obsoleteCores:
         np = self.regionQuery(core, eps)
         for point in np:
             if len(self.regionQuery(point, eps)) >= Minpts and cmp(
                     point, p) != 0:
                 UpdSeedDel.append(point)
     print "\nUpdSeedDel:" + str(UpdSeedDel) + "\nCurCores:" + str(
         self.curCores) + "\nNewCores:" + str(self.newCores)
     if len(UpdSeedDel) <= 0:
         removePts = []
         for pt in Neighbourhood:
             if len(self.regionQuery(pt, eps)) < Minpts:
                 removePts.append(pt)
         for clust in self.Clusters:
             if clust.has(p):
                 clust.remPoint(p)
                 if len(clust.getPoints()) == 0:
                     self.Clusters.remove(clust)
                 else:
                     if len(Neighbourhood) == len(removePts):
                         for poin in clust.getPoints():
                             self.Noise.addPoint(poin)
                         self.Clusters.remove(clust)
                     else:
                         for poin in removePts:
                             clust.remPoint(poin)
                             self.Noise.addPoint(poin)
                 break
         if self.Noise.has(p):
             self.Noise.remPoint(p)
     else:
         directlyConnected = True
         np = self.regionQuery(UpdSeedDel[0], eps)
         for Seed in UpdSeedDel:
             if Seed not in np:
                 directlyConnected = False
         if directlyConnected:
             print "\nProcedure Reached"
             for point in Neighbourhood:
                 isNoise = True
                 neighbour = self.regionQuery(point, eps)
                 for pt in neighbour:
                     if pt in self.newCores:
                         isNoise = False
                         break
                 if isNoise:
                     print "\nFound Noise:" + str(point)
                     for clust in self.Clusters:
                         if clust.has(point):
                             clust.remPoint(point)
                             if len(clust.getPoints()) == 0:
                                 self.Clusters.remove(clust)
                             break
                     if not self.Noise.has(point):
                         self.Noise.addPoint(point)
             for clust in self.Clusters:
                 if clust.has(p):
                     clust.remPoint(p)
                     if len(clust.getPoints()) == 0:
                         self.Clusters.remove(clust)
                     break
         else:
             C = -1
             self.visited = []
             visitedSeeds = []
             newCluster = -1
             for clust in self.Clusters:
                 if clust.has(p):
                     C = clust
                     break
             if C != -1:
                 self.Clusters.remove(C)
                 for seed in UpdSeedDel:
                     neighbour = []
                     if seed not in visitedSeeds:
                         name = 'Cluster' + str(self.count)
                         self.count += 1
                         newCluster = cluster(name)
                         visitedSeeds.append(seed)
                         if seed not in self.visited:
                             self.visited.append(seed)
                         newCluster.addPoint(seed)
                         neighbour = self.regionQuery(seed, eps)
                         for pt in neighbour:
                             if pt not in self.visited:
                                 self.visited.append(pt)
                             if pt in UpdSeedDel:
                                 if pt not in visitedSeeds:
                                     visitedSeeds.append(pt)
                             np = self.regionQuery(pt, eps)
                             if len(np) >= Minpts:
                                 for poin in np:
                                     if poin not in self.visited:
                                         neighbour.append(poin)
                             if not newCluster.has(pt):
                                 newCluster.addPoint(pt)
                         if len(visitedSeeds) == len(UpdSeedDel):
                             self.Clusters.append(newCluster)
                             break
                         else:
                             self.Clusters.append(newCluster)
     self.curCores = list(self.newCores)
def Query(q):
    contexts=question_analysis(q)
    sorted_d = []
    table=[]
    d2 = dict()
    d3 = dict()
    # 执行查询
    cl = SphinxClient()
    cl.SetServer ( host, port )
    cl.SetWeights ( [100, 1] )
    cl.SetMatchMode ( mode )
    if filtervals:
        cl.SetFilter ( filtercol, filtervals )
    if groupby:
	   cl.SetGroupBy ( groupby, SPH_GROUPBY_ATTR, groupsort )
    if sortby:
	   cl.SetSortMode ( SPH_SORT_EXTENDED, sortby )
    if limit:
	   cl.SetLimits ( 0, limit, max(limit,1000) )
    res = cl.Query ( q, index )
    log_f = open("retrieve.log","w")
    log_c = open("candidate.log","w") 
    if not res:
    	print 'query failed: %s' % cl.GetLastError()
    	sys.exit(1)

    if cl.GetLastWarning():
	   print 'WARNING: %s\n' % cl.GetLastWarning()

    print >> log_f, 'Query \'%s\' retrieved %d of %d matches in %s sec' % (q, res['total'], res['total_found'], res['time'])
    print >> log_f, 'Query stats:'

    if res.has_key('words'):
        for info in res['words']:
            print >> log_f, '\t\'%s\' found %d times in %d documents' % (info['word'], info['hits'], info['docs'])

    if res.has_key('matches'):
    	n = 1
    	print >> log_f, '\nMatches:'
    	for match in res['matches']:
    		filePath = d[str(match['id'])]
                tree = ET.ElementTree(file=filePath)
                match='article[@id="'+str(match['id'])+'"]'
                for elem in tree.iterfind(match):
                    print >> log_f, elem[0].text.encode('utf-8'), elem[1].text.encode('utf-8')
                    cuttest(elem[0].text.encode('utf-8').strip(),d2,d3,n)
                    lines = elem[1].text.encode('utf-8').strip().splitlines()
                
                for line in lines:
                    sents = line.strip().split('。')
                    for sen in sents:
                        cuttest(sen.strip(),d2,d3,n)    
                n += 1
        

        #候选词按词频从大到小排列
        sorted_d = sorted(d2.iteritems(),key=operator.itemgetter(1),reverse=True)
        
        #test = open('test.txt','w')
        i=1
        for w in sorted_d:
            if i>600:
                break
            name = w[0].encode('utf-8')
            c = cluster(name)
            tf = max(d3[w[0]].values())
            s = len(d3[w[0]])
            tfidf = tf*log(n-1/s)
            prob=1
            for y in contexts:
                if len(y)>5:
                    print q,name,y[0],y[1],y[2],y[3],y[4],y[5],probability_clusterInContext_givenCluster(c,y)
                else:
                    print q,name,y[0],y[1],y[2],probability_clusterInContext_givenCluster(c,y)
                prob*=probability_clusterInContext_givenCluster(c,y)
            if prob==1:
                prob=0
            print >> log_c, name,'\t','1:'+str(w[1]),'\t','2:'+str(tf),'\t','3:'+str(prob)
            if prob>0:
                table.append([w[0],'1:'+str(w[1]),'2:'+str(tf),'3:'+str(prob)])
            i+=1
    else:
        print >> log_c, "no result"
    
    return table