def runScript(bestRankersFile, frK, tK): #"bestRanker.p" sys.argv[1] fromK = int(frK) toK = int(tK) + 1 global bestKClusterGroup, queryRankerList, queryRankerDict #commented out part is for test purposes #data = np.vstack((rand(150,2) + np.array([.5,.5]),rand(150,2), rand(150,2) + np.array([2.5,2.5]), rand(150,2) + np.array([10.5,10.5]))) bestKClusterGroup1 = get_best_clusters( getData(bestRankersFile), fromK, toK) #list > list(cluster#) > np.array,np.array etc... bestKClusterGroup2 = [] #converting list > list(cluster#) > np.array (ranker),np.array etc... to list > list(cluster#-->index of list) > normal list(ranker),list etc... for i in range(len(bestKClusterGroup1)): bestKClusterGroup2.append([]) for j in range(len(bestKClusterGroup1[i])): bestKClusterGroup2[i].append(bestKClusterGroup1[i][j].tolist()) clusterDataObject = clusterData() for i in range(len(bestKClusterGroup2)): #make object ---> dict[clusterNumber:int] = list of all rankers (where rankers are also lists) clusterDataObject.clusterToRanker[i] = bestKClusterGroup2[i] print type(clusterDataObject.clusterToRanker[i]), len( clusterDataObject.clusterToRanker[i]) #make object ---> dict[queryID:string] = list of cluster numbers as ints for i in clusterDataObject.clusterToRanker: for j in clusterDataObject.clusterToRanker[i]: for k in queryRankerDict: if type(queryRankerDict[k]) == list: for l in queryRankerDict[k]: if l.tolist() == j: if k in clusterDataObject.queryToCluster.keys(): clusterDataObject.queryToCluster[k].append(i) else: clusterDataObject.queryToCluster[k] = [i] elif queryRankerDict[k].tolist() == j: clusterDataObject.queryToCluster[k] = i '''for i in clusterDataObject.queryToCluster: print i, clusterDataObject.queryToCluster[i] for i in clusterDataObject.clusterToRanker: print i, clusterDataObject.clusterToRanker[i]''' if not os.path.exists("ClusterData"): os.makedirs("ClusterData") paths = bestRankersFile.split('/') name = paths[len(paths) - 1] parts = name.split('.') name = parts[0] pickle.dump(clusterDataObject, open("ClusterData/" + name + ".data", "wb")) '''print '-----------------Print output of one of the object files-----------------------' loadedFile = pickle.load( open( "../../../ClusterData/clusterToRankerDict.data", "rb" ) ) for i in loadedFile: print i #sys.exit() for j in loadedFile[i]: print j''' return clusterDataObject.queryToCluster, clusterDataObject.clusterToRanker
def runScript(self):#"bestRanker.p" sys.argv[1] #commented out part is for test purposes #data = np.vstack((random(150,2) + np.array([.5,.5]),random(150,2), random(150,2) + np.array([2.5,2.5]), rand(150,2) + np.array([10.5,10.5]))) data = self.getData() self.bestKClusterGroup1 = self.get_best_clusters(data) #list > list(cluster#) > np.array,np.array etc... self.bestKClusterGroup2 = [] #converting list > list(cluster#) > np.array (ranker),np.array etc... to list > list(cluster#-->index of list) > normal list(ranker),list etc... for i in range(len(self.bestKClusterGroup1)): self.bestKClusterGroup2.append([]) for j in range(len(self.bestKClusterGroup1[i])): self.bestKClusterGroup2[i].append(self.bestKClusterGroup1[i][j].tolist()) clusterDataObject = clusterData() for i in range(len(self.bestKClusterGroup2)): #make object ---> dict[clusterNumber:int] = list of all rankers (where rankers are also lists) clusterDataObject.clusterToRanker[i] = self.bestKClusterGroup2[i] print type(clusterDataObject.clusterToRanker[i]), len(clusterDataObject.clusterToRanker[i]) #make object ---> dict[queryID:string] = list of cluster numbers as ints for i in clusterDataObject.clusterToRanker: for j in clusterDataObject.clusterToRanker[i]: for k in self.queryRankerDict: if type(self.queryRankerDict[k]) == list: for l in self.queryRankerDict[k]: if l.tolist() == j: if k in clusterDataObject.queryToCluster.keys(): clusterDataObject.queryToCluster[k].append(i) else: clusterDataObject.queryToCluster[k] = [i] elif self.queryRankerDict[k].tolist() == j: clusterDataObject.queryToCluster[k] = i '''for i in clusterDataObject.queryToCluster: print i, clusterDataObject.queryToCluster[i] for i in clusterDataObject.clusterToRanker: print i, clusterDataObject.clusterToRanker[i]''' if not os.path.exists("ClusterData"): os.makedirs("ClusterData") pickle.dump(clusterDataObject, open("ClusterData/"+self.dataset+str(self.iterations)+'.data', "wb")) #pickle.dump(clusterDataObject, open("ClusterData/"+self.dataset+" k"+self.bestK+".data", "wb")) #pickle.dump(clusterDataObject.queryToCluster, open( "ClusterData/queryToClusterDict.data", "wb" ) ) #pickle.dump(clusterDataObject.clusterToRanker, open( "ClusterData/clusterToRankerDict.data", "wb" ) ) '''print '-----------------Print output of one of the object files-----------------------' loadedFile = pickle.load( open( "ClusterData/clusterToRankerDict.data", "rb" ) ) for i in loadedFile: print i #sys.exit() for j in loadedFile[i]: print j''' return clusterDataObject.queryToCluster, clusterDataObject.clusterToRanker
def runScript(self): #"bestRanker.p" sys.argv[1] #commented out part is for test purposes #data = np.vstack((random(150,2) + np.array([.5,.5]),random(150,2), random(150,2) + np.array([2.5,2.5]), rand(150,2) + np.array([10.5,10.5]))) data = self.getData() dataToClusters = self.getClusters( data) #list > list(cluster#) > np.array,np.array etc... dataToClusters = list(dataToClusters) clusterDataObject = clusterData() data = list(data) #make object ---> dict[clusterNumber:int] = list of all rankers (where rankers are also lists) for i in range(len(dataToClusters)): if not dataToClusters[i] in clusterDataObject.clusterToRanker.keys( ): clusterDataObject.clusterToRanker[dataToClusters[i]] = [ list(data[i]) ] else: clusterDataObject.clusterToRanker[dataToClusters[i]].append( list(data[i])) #make object ---> dict[queryID:string] = list of cluster numbers as ints for i in clusterDataObject.clusterToRanker: #for each cluster for j in clusterDataObject.clusterToRanker[ i]: #for each ranker in cluster for q in self.queryRankerDict: #for each query for r in self.queryRankerDict[ q]: #for each ranker in query if list( r ) == j: #if ranker in query is equal to j (current ranker in cluster) if q in clusterDataObject.queryToCluster: #if query key exists in dictionary clusterDataObject.queryToCluster[q].append(i) else: clusterDataObject.queryToCluster[q] = [i] for i in clusterDataObject.queryToCluster: print i, len(clusterDataObject.queryToCluster[i] ), clusterDataObject.queryToCluster[i] for i in clusterDataObject.clusterToRanker: print i, len(clusterDataObject.clusterToRanker[i] ) #, clusterDataObject.clusterToRanker[i] if not os.path.exists("ClusterData"): os.makedirs("ClusterData") pickle.dump(clusterDataObject, open("ClusterData/" + self.dataset + ".data", "wb")) return clusterDataObject.queryToCluster, clusterDataObject.clusterToRanker
def runScript(self):#"bestRanker.p" sys.argv[1] #commented out part is for test purposes #data = np.vstack((random(150,2) + np.array([.5,.5]),random(150,2), random(150,2) + np.array([2.5,2.5]), rand(150,2) + np.array([10.5,10.5]))) data = self.getData() dataToClusters = self.getClusters(data) #list > list(cluster#) > np.array,np.array etc... dataToClusters = list(dataToClusters) clusterDataObject = clusterData() data = list(data) #make object ---> dict[clusterNumber:int] = list of all rankers (where rankers are also lists) for i in range(len(dataToClusters)): if not dataToClusters[i] in clusterDataObject.clusterToRanker.keys(): clusterDataObject.clusterToRanker[dataToClusters[i]] = [list(data[i])] else: clusterDataObject.clusterToRanker[dataToClusters[i]].append(list(data[i])) #make object ---> dict[queryID:string] = list of cluster numbers as ints for i in clusterDataObject.clusterToRanker:#for each cluster for j in clusterDataObject.clusterToRanker[i]:#for each ranker in cluster for q in self.queryRankerDict:#for each query for r in self.queryRankerDict[q]:#for each ranker in query if list(r) == j:#if ranker in query is equal to j (current ranker in cluster) if q in clusterDataObject.queryToCluster:#if query key exists in dictionary clusterDataObject.queryToCluster[q].append(i) else: clusterDataObject.queryToCluster[q] = [i] for i in clusterDataObject.queryToCluster: print i, len(clusterDataObject.queryToCluster[i]), clusterDataObject.queryToCluster[i] for i in clusterDataObject.clusterToRanker: print i, len(clusterDataObject.clusterToRanker[i])#, clusterDataObject.clusterToRanker[i] if not os.path.exists("ClusterData"): os.makedirs("ClusterData") pickle.dump(clusterDataObject, open("ClusterData/"+self.dataset+".data", "wb")) return clusterDataObject.queryToCluster, clusterDataObject.clusterToRanker
def runScript(bestRankersFile, frK, tK):#"bestRanker.p" sys.argv[1] fromK = int(frK) toK = int(tK)+1 global bestKClusterGroup, queryRankerList, queryRankerDict #commented out part is for test purposes #data = np.vstack((rand(150,2) + np.array([.5,.5]),rand(150,2), rand(150,2) + np.array([2.5,2.5]), rand(150,2) + np.array([10.5,10.5]))) bestKClusterGroup1 = get_best_clusters(getData(bestRankersFile),fromK,toK) #list > list(cluster#) > np.array,np.array etc... bestKClusterGroup2 = [] #converting list > list(cluster#) > np.array (ranker),np.array etc... to list > list(cluster#-->index of list) > normal list(ranker),list etc... for i in range(len(bestKClusterGroup1)): bestKClusterGroup2.append([]) for j in range(len(bestKClusterGroup1[i])): bestKClusterGroup2[i].append(bestKClusterGroup1[i][j].tolist()) clusterDataObject = clusterData() for i in range(len(bestKClusterGroup2)): #make object ---> dict[clusterNumber:int] = list of all rankers (where rankers are also lists) clusterDataObject.clusterToRanker[i] = bestKClusterGroup2[i] print type(clusterDataObject.clusterToRanker[i]), len(clusterDataObject.clusterToRanker[i]) #make object ---> dict[queryID:string] = list of cluster numbers as ints for i in clusterDataObject.clusterToRanker: for j in clusterDataObject.clusterToRanker[i]: for k in queryRankerDict: if type(queryRankerDict[k]) == list: for l in queryRankerDict[k]: if l.tolist() == j: if k in clusterDataObject.queryToCluster.keys(): clusterDataObject.queryToCluster[k].append(i) else: clusterDataObject.queryToCluster[k] = [i] elif queryRankerDict[k].tolist() == j: clusterDataObject.queryToCluster[k] = i '''for i in clusterDataObject.queryToCluster: print i, clusterDataObject.queryToCluster[i] for i in clusterDataObject.clusterToRanker: print i, clusterDataObject.clusterToRanker[i]''' if not os.path.exists("ClusterData"): os.makedirs("ClusterData") paths=bestRankersFile.split('/') name=paths[len(paths)-1] parts=name.split('.') name=parts[0] pickle.dump(clusterDataObject, open( "ClusterData/"+name+".data", "wb" ) ) '''print '-----------------Print output of one of the object files-----------------------' loadedFile = pickle.load( open( "../../../ClusterData/clusterToRankerDict.data", "rb" ) ) for i in loadedFile: print i #sys.exit() for j in loadedFile[i]: print j''' return clusterDataObject.queryToCluster, clusterDataObject.clusterToRanker
def runScript(self): #"bestRanker.p" sys.argv[1] #commented out part is for test purposes #data = np.vstack((random(150,2) + np.array([.5,.5]),random(150,2), random(150,2) + np.array([2.5,2.5]), rand(150,2) + np.array([10.5,10.5]))) data = self.getData() self.bestKClusterGroup1 = self.get_best_clusters( data) #list > list(cluster#) > np.array,np.array etc... self.bestKClusterGroup2 = [] #converting list > list(cluster#) > np.array (ranker),np.array etc... to list > list(cluster#-->index of list) > normal list(ranker),list etc... for i in range(len(self.bestKClusterGroup1)): self.bestKClusterGroup2.append([]) for j in range(len(self.bestKClusterGroup1[i])): self.bestKClusterGroup2[i].append( self.bestKClusterGroup1[i][j].tolist()) clusterDataObject = clusterData() for i in range(len(self.bestKClusterGroup2)): #make object ---> dict[clusterNumber:int] = list of all rankers (where rankers are also lists) clusterDataObject.clusterToRanker[i] = self.bestKClusterGroup2[i] print type(clusterDataObject.clusterToRanker[i]), len( clusterDataObject.clusterToRanker[i]) #make object ---> dict[queryID:string] = list of cluster numbers as ints for i in clusterDataObject.clusterToRanker: for j in clusterDataObject.clusterToRanker[i]: for k in self.queryRankerDict: if type(self.queryRankerDict[k]) == list: for l in self.queryRankerDict[k]: if l.tolist() == j: if k in clusterDataObject.queryToCluster.keys( ): clusterDataObject.queryToCluster[k].append( i) else: clusterDataObject.queryToCluster[k] = [i] elif self.queryRankerDict[k].tolist() == j: clusterDataObject.queryToCluster[k] = i '''for i in clusterDataObject.queryToCluster: print i, clusterDataObject.queryToCluster[i] for i in clusterDataObject.clusterToRanker: print i, clusterDataObject.clusterToRanker[i]''' if not os.path.exists("ClusterData"): os.makedirs("ClusterData") pickle.dump( clusterDataObject, open( "ClusterData/" + self.dataset + str(self.iterations) + '.data', "wb")) #pickle.dump(clusterDataObject, open("ClusterData/"+self.dataset+" k"+self.bestK+".data", "wb")) #pickle.dump(clusterDataObject.queryToCluster, open( "ClusterData/queryToClusterDict.data", "wb" ) ) #pickle.dump(clusterDataObject.clusterToRanker, open( "ClusterData/clusterToRankerDict.data", "wb" ) ) '''print '-----------------Print output of one of the object files-----------------------' loadedFile = pickle.load( open( "ClusterData/clusterToRankerDict.data", "rb" ) ) for i in loadedFile: print i #sys.exit() for j in loadedFile[i]: print j''' return clusterDataObject.queryToCluster, clusterDataObject.clusterToRanker