def Validate(self, firstTimeSpan, secondTimeSpan): self.m = svm_load_model("./temp data/Model1970_1979") y, x = self.GenerateYAndX(firstTimeSpan, secondTimeSpan) p_label, p_acc, p_val = svm_predict(y, x, self.m) nodesDict = InitialData.InitialNodesPairWeightDict(secondTimeSpan) print "len(nodesDict) = ", len(nodesDict) nodesList = InitialData.InitialNodesList(firstTimeSpan) print "len(nodesList) = ", len(nodesList) linksAppeared = 0 for i in nodesDict: if i in nodesList: for j in nodesDict[i]: if j in nodesList: linksAppeared += 1 truePositive = 0 falsePositve = 0 print "linksAppeared = ", linksAppeared yfile = open("./temp data/y", "w") for i in y: yfile.write(str(i) + "\n") wfile = open("./temp data/p_label", "w") for i in p_label: wfile.write(str(i) + "\n")
def GenerateXAndYForTesting(self): nodesDict = InitialData.InitialNodesPairWeightDict( self.firstTestingStartTime, self.firstTestingEndTime) nodesDictLabel = InitialData.InitialNodesPairWeightDict( self.secondTestingStartTime, self.secondTestingEndTime) firstTimeSpan = StringProcessing.GetTimeSpan( self.firstTestingStartTime, self.firstTestingEndTime) vectorsDictFile = open("./temp data/Vectors" + firstTimeSpan) vectorsDict = pickle.load(vectorsDictFile) row = [] col = [] data = [] y = [] rowId = 0 components = ConnectedComponents.ReadAllConnectedComponentsFromFile( self.firstTestingStartTime, self.firstTestingEndTime) for component in components: for i in component: for j in component: if i != j: if i not in nodesDict or (i in nodesDict and j not in nodesDict[i]): if i in vectorsDict and j in vectorsDict[i]: for k in vectorsDict[i][j]: row.append(rowId) col.append(k) data.append(vectorsDict[i][j][k]) if i in nodesDictLabel and j in nodesDictLabel[ i]: y.append(1) else: y.append(-1) else: row.append(rowId) col.append(0) data.append(1) y.append(-1) rowId += 1 row.append(rowId) col.append(4194300) data.append(1) y.append(-1) x = csr_matrix((data, (row, col))) return x, np.array(y)
def WriteAllConnectedComponentsToFile(startTime, endTime): timeSpan = StringProcessing.GetTimeSpan(startTime, endTime) nodesPairWeightDict = InitialData.InitialNodesPairWeightDict( startTime, endTime) connectedComponents = FindAllConnectedComponents(nodesPairWeightDict) connectedComponentsFile = open( "./temp data/ConnectedComponents" + timeSpan, "w") pickle.dump(connectedComponents, connectedComponentsFile) connectedComponentsFile.close()
def ReadAllConnectedComponentsFromFile(timeSpan): if os.path.exists(sys.path[0] + "\ConnectedComponents_" + timeSpan): connectedComponentsFile = open("./temp data/ConnectedComponents_" + timeSpan, "r") connectedComponents = pickle.load(connectedComponentsFile) connectedComponentsFile.close() return connectedComponents else: nodesPairWeightDict = InitialData.InitialNodesPairWeightDict(timeSpan) connectedComponents = FindAllConnectedComponents(nodesPairWeightDict) return connectedComponents
def ReadAllConnectedComponentsFromFile(startTime, endTime): timeSpan = StringProcessing.GetTimeSpan(startTime, endTime) if os.path.exists(sys.path[0] + "\ConnectedComponents" + timeSpan): connectedComponentsFile = open( "./temp data/ConnectedComponents" + timeSpan, "r") connectedComponents = pickle.load(connectedComponentsFile) connectedComponentsFile.close() return connectedComponents else: nodesPairWeightDict = InitialData.InitialNodesPairWeightDict( startTime, endTime) connectedComponents = FindAllConnectedComponents(nodesPairWeightDict) return connectedComponents
def Validate(self, firstTimeSpan, secondTimeSpan): #self.m = svm_load_model("./temp data/Model1970_1979") y, x = self.GenerateYAndX(firstTimeSpan, secondTimeSpan) p_label, p_acc, p_val = svm_predict(y, x, self.m) nodesDict = InitialData.InitialNodesPairWeightDict(secondTimeSpan) print "len(nodesDict) = ", len(nodesDict) nodesList = InitialData.InitialNodesList(firstTimeSpan) print "len(nodesList) = ", len(nodesList) linksAppeared = 0 for i in nodesDict: if i in nodesList: for j in nodesDict[i]: if j in nodesList: linksAppeared += 1 truePositive = 0 falsePositve = 0 print "linksAppeared = ", linksAppeared yfile = open("./temp data/y", "w") for i in y: yfile.write(str(i) + "\n") wfile = open("./temp data/p_label", "w") for i in p_label: wfile.write(str(i) + "\n") for i in xrange(0, len(p_label)): if p_label[i] == 1: if y[i] == 1: truePositive += 1 else: falsePositve += 1 print "truePositive = ", truePositive, "falsePositve = ", falsePositve precision = float(truePositive) / (truePositive + falsePositve) recall = float(truePositive) / linksAppeared print "precision = ", precision, "recall = ", recall
def GenerateYAndX(self, firstTimeSpan, secondTimeSpan): nodesDict = InitialData.InitialNodesPairWeightDict(secondTimeSpan) vectorsDictFile = open("./temp data/Vectors" + firstTimeSpan) vectorsDict = pickle.load(vectorsDictFile) y = [] #y: a Python list/tuple of l labels (type must be int/double). x = [ ] #x: a Python list/tuple of l data instances. Each element of x must be an instance of list/tuple/dictionary type. for i in vectorsDict: for j in vectorsDict[i]: x.append(vectorsDict[i][j]) print("i: %s, j: %s" % (i, j)) if i in nodesDict and j in nodesDict[i]: y.append(1) else: y.append(-1) return y, x
def OverlappingCommunityDetection(startTime, endTime): timeSpan = StringProcessing.GetTimeSpan(startTime, endTime) delimiter = " " fullFileName = "../../../data/facebook-wosn-wall/edges" + timeSpan + ".data" nodesPairWeightDict = InitialData.InitialNodesPairWeightDict( startTime, endTime) ij2wij = {} for i in nodesPairWeightDict: for j in nodesPairWeightDict[i]: ij2wij[str(i), str(j)] = nodesPairWeightDict[i][j] basename = os.path.splitext(fullFileName)[0] adj, edges = read_edgelist_unweighted(fullFileName, delimiter=delimiter) edge2cid, S_max, D_max, list_D = HLC(adj, edges).single_linkage(w=ij2wij) write_edge2cid(edge2cid, timeSpan, delimiter=delimiter)
def GenerateVector(paths, nodesDict): pathSet = set() vector = {} for path in paths: if HashPath(path) not in pathSet: pathSet.add(HashPath(path)) pathDict = CompleteGroup(path, nodesDict) nodeMapping = {path[0]: 1, path[-1]: 2} for i in xrange(1, len(path) - 1): nodeMapping[path[i]] = i + 2 adjacencyMatrix = InitialData.InitialMatrix() for i in pathDict: for j in pathDict[i]: adjacencyMatrix[nodeMapping[i]][nodeMapping[j]] = 1 adjacencyMatrix[nodeMapping[j]][nodeMapping[i]] = 1 minAddress = CalSubgraphAddress.ArgMinAddress(adjacencyMatrix) vector[minAddress] = vector.get(minAddress, 0) + 1 return vector
def CommunityDetection(timeSpan): G = nx.Graph() nodesPairWeightDict = InitialData.InitialNodesPairWeightDict(timeSpan) for i in nodesPairWeightDict: for j in nodesPairWeightDict[i]: G.add_edge(i, j, weight=nodesPairWeightDict[i][j]) partition = community.best_partition(G) partitionDict = {} for i in partition: if partition[i] not in partitionDict: partitionDict[partition[i]] = [] partitionDict[partition[i]].append(i) partitionSets = partitionDict.values() partitionSetsFile = open("./temp data/PartitionSets" + timeSpan, "w") pickle.dump(partitionSets, partitionSetsFile) partitionSetsFile.close()
def GenerateVector(paths, nodesDict, addressDict): pathSet = set() vector = {} for path in paths: hashValue = HashPath(path) if hashValue not in pathSet: pathSet.add(hashValue) pathDict = CompleteGroup(path, nodesDict) nodeMapping = {path[0]: 1, path[-1]: 2} for i in xrange(1, len(path) - 1): nodeMapping[path[i]] = i + 2 adjacencyMatrix = InitialData.InitialMatrix() for i in pathDict: for j in pathDict[i]: adjacencyMatrix[nodeMapping[i]][nodeMapping[j]] = 1 #adjacencyMatrix[nodeMapping[j]][nodeMapping[i]] = 1 adjacencyMatrixAddress = Address().GetAddress(adjacencyMatrix) subAddress = addressDict[adjacencyMatrixAddress] vector[subAddress] = vector.get(subAddress, 0) + 1 return vector
def GenerateXAndYForTraining(self): nodesDict = InitialData.InitialNodesPairWeightDict( self.secondTrainingStartTime, self.secondTrainingEndTime) firstTimeSpan = StringProcessing.GetTimeSpan( self.firstTrainingStartTime, self.firstTrainingEndTime) vectorsDictFile = open("./temp data/Vectors" + firstTimeSpan) vectorsDict = pickle.load(vectorsDictFile) row = [] col = [] data = [] y = [] row.append(0) col.append(0) data.append(1) y.append(-1) rowId = 1 for i in vectorsDict: for j in vectorsDict[i]: for k in vectorsDict[i][j]: row.append(rowId) col.append(k) data.append(vectorsDict[i][j][k]) if i in nodesDict and j in nodesDict[i]: y.append(1) else: y.append(-1) rowId += 1 row.append(rowId) col.append(4194300) data.append(1) y.append(-1) x = csr_matrix((data, (row, col))) return x, np.array(y)
def WriteAllConnectedComponentsToFile(timeSpan): nodesPairWeightDict = InitialData.InitialNodesPairWeightDict(timeSpan) connectedComponents = FindAllConnectedComponents(nodesPairWeightDict) connectedComponentsFile = open("./temp data/ConnectedComponents_" + timeSpan, "w") pickle.dump(connectedComponents, connectedComponentsFile) connectedComponentsFile.close()
import datetime from datetime import date from datetime import timedelta import calendar import numpy as np from InitialData import * from Aircraft import * from AircraftDatabase import * from Workshop import * from HangarDatabase import * from Maintenance import * from MaintenanceDatabase import * from Schedule import * from ScehduleDatabase import * initial_data = InitialData() initial_data.LoadAllFile() #Load the data --> in MIP Model & MIP Model Sampel #Loading data: InitialData.py #Preprocessing #Aircraft Data: Aircraft.py, AircraftDatabase.py #Hangar Data: HangarDatabase.py, Workshop.py #Scheduling Data: Schedule.py #Define sets and indexes #Define index
import InitialData import pickle # ###merge 9#### # #filesNames, fullFilesNames = InitialData.FileWalker("./temp data/Vectors1970_1985/9") # vectorsDict = {} # for fullFileName in fullFilesNames: # vectorDictFile = open(fullFileName) # vectorDict = pickle.load(vectorDictFile) # for i in vectorDict: # if i not in vectorsDict: # vectorsDict[i] = {} # vectorsDict[i].update(vectorDict[i]) # vectorsDictFile = open("./temp data/Vectors1970_1985/Vectors_9", "w") # pickle.dump(vectorsDict, vectorsDictFile) vectorsDict = {} filesNames, fullFilesNames = InitialData.FileWalker( "./temp data/Vectors1970_1985_bak") for fullFileName in fullFilesNames: print fullFileName vectorDictFile = open(fullFileName) vectorsDict.update(pickle.load(vectorDictFile)) vectorsDictFile = open("./temp data/Vectors1970_1985", "w") pickle.dump(vectorsDict, vectorsDictFile)
def CommunityDetection(timeSpan): #nodesPairWeightDict = {0:{2:1, 4:1, 5:1, 3:1}, 1:{2:1, 4:1, 7:1}, 2:{0:1, 1:1, 4:1, 5:1, 6:1}, 3:{0:1, 7:1}, 4:{0:1, 1:1, 2:1, 10:1}, 5:{0:1, 2:1, 7:1, 11:1}, 6:{2:1, 7:1, 11:1}, 7:{1:1, 3:1, 5:1, 6:1}, 8:{15:1, 14:1, 10:1, 9:1, 11:1}, 9:{8:1, 14:1, 12:1}, 10:{12:1, 14:1, 4:1, 8:1, 11:1, 13:1}, 11:{13:1, 10:1, 8:1, 5:1, 6:1}, 12:{9:1, 10:1}, 13:{10:1, 11:1}, 14:{8:1, 9:1, 10:1}, 15:{8:1}} threshold = 0.000001 connectedComponents = ConnectedComponents.ReadAllConnectedComponentsFromFile( timeSpan) allNodesPairWeightDict = InitialData.InitialNodesPairWeightDict(timeSpan) partitionSets = [] for connectedComponent in connectedComponents: print "connectedComponent size = ", len(connectedComponent) nodesPairWeightDict = {} for i in connectedComponent: nodesPairWeightDict[i] = allNodesPairWeightDict[i] nodesPartitionDict = {} for i in nodesPairWeightDict: nodesPartitionDict[i] = set() nodesPartitionDict[i].add(i) c = {} for i in nodesPairWeightDict: c[i] = i while True: (m, nodesWeightDict) = GetNodesWeight(nodesPairWeightDict) qGlobal = GetModularity(nodesWeightDict, nodesPairWeightDict, m, c) while True: isChanged = False for i in nodesPairWeightDict: gainMax = 0.0 cMax = None for j in nodesPairWeightDict[i]: qGain = GetModularityGain(nodesWeightDict, nodesPairWeightDict, m, c, i, c[j]) if qGain > gainMax: gainMax = qGain cMax = c[j] if cMax != None: if c[i] != cMax: c[i] = cMax isChanged = True if not isChanged: break newNodesPairWeightDict = {} for i in nodesPairWeightDict.keys(): if c[i] not in newNodesPairWeightDict: newNodesPairWeightDict[c[i]] = {} for j in nodesPairWeightDict[i].keys(): newNodesPairWeightDict[ c[i]][c[j]] = newNodesPairWeightDict[c[i]].get( c[j], 0) + nodesPairWeightDict[i][j] nodesPairWeightDict = newNodesPairWeightDict (m, nodesWeightDict) = GetNodesWeight(nodesPairWeightDict) newNodesPartitionDict = {} for i in c: if c[i] not in newNodesPartitionDict: newNodesPartitionDict[c[i]] = [] newNodesPartitionDict[c[i]] = list( set(newNodesPartitionDict[c[i]]).union( set(nodesPartitionDict[i]))) nodesPartitionDict = newNodesPartitionDict c = {} for i in nodesPairWeightDict: c[i] = i qNew = GetModularity(nodesWeightDict, nodesPairWeightDict, m, c) if qNew - qGlobal < threshold: break qGlobal = qNew partitionSets += nodesPartitionDict.values() partitionSetsFile = open("./temp data/PartitionSets" + timeSpan, "w") pickle.dump(partitionSets, partitionSetsFile) partitionSetsFile.close()
minutetable = 'stock_minute' finnhub_client = finnhub.Client(api_key="bv4f2qn48v6qpatdiu3g") #print("Here is a") if updatestockdata.check_Table('stock_daily') is False: sqlcommand = "CREATE TABLE IF NOT EXISTS " + dailytable + " (" \ "symbol VARCHAR(50) NOT NULL, " \ "close FLOAT NOT NULL, " \ "high FLOAT NOT NULL, " \ "low FLOAT NOT NULL, " \ "open FLOAT NOT NULL, " \ "time INT NOT NULL, " \ "volume FLOAT NOT NULL) " util.execute_sql(sqlcommand) initialdata.initial_stockdata('sec_list_1000.csv', 'D', 979527600, 1610582400, dailytable) logging.info('Create the database table ' + dailytable) if updatestockdata.check_Table('stock_minute') is False: sqlcommand = "CREATE TABLE IF NOT EXISTS " + minutetable + " (" \ "symbol VARCHAR(50) NOT NULL, " \ "close FLOAT NOT NULL, " \ "high FLOAT NOT NULL, " \ "low FLOAT NOT NULL, " \ "open FLOAT NOT NULL, " \ "time INT NOT NULL, " \ "volume FLOAT NOT NULL) " util.execute_sql(sqlcommand) initialdata.initial_stockdata('sec_list_1000.csv', '1', 979527600, 1610582400, minutetable)
import InitialData import pickle # # ###merge 9#### # filesNames, fullFilesNames = InitialData.FileWalker("./temp data/8_9/") # vectorsDict = {} # for fullFileName in fullFilesNames: # vectorDictFile = open(fullFileName) # vectorDict = pickle.load(vectorDictFile) # for i in vectorDict: # if i not in vectorsDict: # vectorsDict[i] = {} # vectorsDict[i].update(vectorDict[i]) # vectorsDictFile = open("./temp data/Vectors2004_2006_8_9", "w") # pickle.dump(vectorsDict, vectorsDictFile) vectorsDict = {} filesNames, fullFilesNames = InitialData.FileWalker( "./temp data/Vectors2004_2006_parts") for fullFileName in fullFilesNames: print fullFileName vectorDictFile = open(fullFileName) vectorsDict.update(pickle.load(vectorDictFile)) vectorsDictFile = open("./temp data/Vectors2004_2006_part1", "w") pickle.dump(vectorsDict, vectorsDictFile)
def GenerateRank(self, partitionMethod = "ConnectedComponents"): timeSpan = StringProcessing.GetTimeSpan(self.firstStartTime, self.firstEndTime) nodesDict = InitialData.InitialNodesPairWeightDict(self.firstStartTime, self.firstEndTime) nodesDictLabel = InitialData.InitialNodesPairWeightDict(self.secondStartTime, self.secondEndTime) if partitionMethod == "ConnectedComponents": connectedComponents = ConnectedComponents.ReadAllConnectedComponentsFromFile(self.firstStartTime, self.firstEndTime) elif partitionMethod == "CommunityDetection": connectedComponents = CommunityDetection.ReadCommunitiesFromFile(self.firstStartTime, self.firstEndTime) else: connectedComponents = LinkClustering.ReadAllConnectedComponentsFromFile(self.firstStartTime, self.firstEndTime) pairDict = {} y_true = [] y_score = [] vectorsDict = {} for nodes in connectedComponents: print("Components size = %d"%len(nodes)) G = nx.Graph() componentNodesDict = {} for i in nodes: if i not in componentNodesDict: componentNodesDict[i] = [] for j in nodesDict[i]: if j in nodes: componentNodesDict[i].append(j) G.add_edge(i, j) for i in xrange(0, len(nodes)): startTime = time.time() for j in xrange(i + 1, len(nodes)): if nodes[j] not in componentNodesDict[nodes[i]]: if nodes[i] not in pairDict: pairDict[nodes[i]] = {} pairDict[nodes[i]][nodes[j]] = 1 # startTime = time.time() iNeighbors = [] for neighbor in nodesDict[nodes[i]]: iNeighbors.append(neighbor) jNeighbors = [] for neighbor in nodesDict[nodes[j]]: jNeighbors.append(neighbor) intersect = set(iNeighbors)&set(jNeighbors) y_score.append(len(intersect)) if nodes[i] in nodesDictLabel and nodes[j] in nodesDictLabel[nodes[i]]: y_true.append(1) else: y_true.append(-1) # endTime = time.time() # print("find paths time:%f"%(endTime - startTime)) endTime = time.time() print("nodes[%s] generated, finished in %f s"%(nodes[i], endTime - startTime)) components = ConnectedComponents.ReadAllConnectedComponentsFromFile(self.firstStartTime, self.firstEndTime) for component in components: for i in component: for j in component: if i != j: if i not in pairDict or (i in pairDict and j not in pairDict[i]): if i not in nodesDict or (i in nodesDict and j not in nodesDict[i]): if i in nodesDictLabel and j in nodesDictLabel[i]: y_true.append(1) else: y_true.append(-1) y_score.append(0) return y_true, y_score