Exemplo n.º 1
0
    def Validate(self, firstTimeSpan, secondTimeSpan):
        self.m = svm_load_model("./temp data/Model1970_1979")
        y, x = self.GenerateYAndX(firstTimeSpan, secondTimeSpan)
        p_label, p_acc, p_val = svm_predict(y, x, self.m)
        nodesDict = InitialData.InitialNodesPairWeightDict(secondTimeSpan)
        print "len(nodesDict) = ", len(nodesDict)
        nodesList = InitialData.InitialNodesList(firstTimeSpan)
        print "len(nodesList) = ", len(nodesList)
        linksAppeared = 0
        for i in nodesDict:
            if i in nodesList:
                for j in nodesDict[i]:
                    if j in nodesList:
                        linksAppeared += 1
        truePositive = 0
        falsePositve = 0

        print "linksAppeared = ", linksAppeared

        yfile = open("./temp data/y", "w")
        for i in y:
            yfile.write(str(i) + "\n")

        wfile = open("./temp data/p_label", "w")
        for i in p_label:
            wfile.write(str(i) + "\n")
    def GenerateXAndYForTesting(self):
        nodesDict = InitialData.InitialNodesPairWeightDict(
            self.firstTestingStartTime, self.firstTestingEndTime)
        nodesDictLabel = InitialData.InitialNodesPairWeightDict(
            self.secondTestingStartTime, self.secondTestingEndTime)
        firstTimeSpan = StringProcessing.GetTimeSpan(
            self.firstTestingStartTime, self.firstTestingEndTime)
        vectorsDictFile = open("./temp data/Vectors" + firstTimeSpan)
        vectorsDict = pickle.load(vectorsDictFile)
        row = []
        col = []
        data = []
        y = []

        rowId = 0
        components = ConnectedComponents.ReadAllConnectedComponentsFromFile(
            self.firstTestingStartTime, self.firstTestingEndTime)
        for component in components:
            for i in component:
                for j in component:
                    if i != j:
                        if i not in nodesDict or (i in nodesDict
                                                  and j not in nodesDict[i]):
                            if i in vectorsDict and j in vectorsDict[i]:
                                for k in vectorsDict[i][j]:
                                    row.append(rowId)
                                    col.append(k)
                                    data.append(vectorsDict[i][j][k])
                                if i in nodesDictLabel and j in nodesDictLabel[
                                        i]:
                                    y.append(1)
                                else:
                                    y.append(-1)
                            else:
                                row.append(rowId)
                                col.append(0)
                                data.append(1)
                                y.append(-1)
                            rowId += 1

        row.append(rowId)
        col.append(4194300)
        data.append(1)
        y.append(-1)
        x = csr_matrix((data, (row, col)))
        return x, np.array(y)
Exemplo n.º 3
0
def WriteAllConnectedComponentsToFile(startTime, endTime):
    timeSpan = StringProcessing.GetTimeSpan(startTime, endTime)
    nodesPairWeightDict = InitialData.InitialNodesPairWeightDict(
        startTime, endTime)
    connectedComponents = FindAllConnectedComponents(nodesPairWeightDict)
    connectedComponentsFile = open(
        "./temp data/ConnectedComponents" + timeSpan, "w")
    pickle.dump(connectedComponents, connectedComponentsFile)
    connectedComponentsFile.close()
Exemplo n.º 4
0
def ReadAllConnectedComponentsFromFile(timeSpan):
	if os.path.exists(sys.path[0] + "\ConnectedComponents_" + timeSpan):
		connectedComponentsFile = open("./temp data/ConnectedComponents_" + timeSpan, "r")
		connectedComponents = pickle.load(connectedComponentsFile)
		connectedComponentsFile.close()
		return connectedComponents
	else:
		nodesPairWeightDict = InitialData.InitialNodesPairWeightDict(timeSpan)
		connectedComponents = FindAllConnectedComponents(nodesPairWeightDict)
		return connectedComponents
Exemplo n.º 5
0
def ReadAllConnectedComponentsFromFile(startTime, endTime):
    timeSpan = StringProcessing.GetTimeSpan(startTime, endTime)
    if os.path.exists(sys.path[0] + "\ConnectedComponents" + timeSpan):
        connectedComponentsFile = open(
            "./temp data/ConnectedComponents" + timeSpan, "r")
        connectedComponents = pickle.load(connectedComponentsFile)
        connectedComponentsFile.close()
        return connectedComponents
    else:
        nodesPairWeightDict = InitialData.InitialNodesPairWeightDict(
            startTime, endTime)
        connectedComponents = FindAllConnectedComponents(nodesPairWeightDict)
        return connectedComponents
    def Validate(self, firstTimeSpan, secondTimeSpan):
        #self.m = svm_load_model("./temp data/Model1970_1979")
        y, x = self.GenerateYAndX(firstTimeSpan, secondTimeSpan)
        p_label, p_acc, p_val = svm_predict(y, x, self.m)
        nodesDict = InitialData.InitialNodesPairWeightDict(secondTimeSpan)
        print "len(nodesDict) = ", len(nodesDict)
        nodesList = InitialData.InitialNodesList(firstTimeSpan)
        print "len(nodesList) = ", len(nodesList)
        linksAppeared = 0
        for i in nodesDict:
            if i in nodesList:
                for j in nodesDict[i]:
                    if j in nodesList:
                        linksAppeared += 1
        truePositive = 0
        falsePositve = 0

        print "linksAppeared = ", linksAppeared

        yfile = open("./temp data/y", "w")
        for i in y:
            yfile.write(str(i) + "\n")

        wfile = open("./temp data/p_label", "w")
        for i in p_label:
            wfile.write(str(i) + "\n")

        for i in xrange(0, len(p_label)):
            if p_label[i] == 1:
                if y[i] == 1:
                    truePositive += 1
                else:
                    falsePositve += 1

        print "truePositive = ", truePositive, "falsePositve = ", falsePositve

        precision = float(truePositive) / (truePositive + falsePositve)
        recall = float(truePositive) / linksAppeared
        print "precision = ", precision, "recall = ", recall
Exemplo n.º 7
0
 def GenerateYAndX(self, firstTimeSpan, secondTimeSpan):
     nodesDict = InitialData.InitialNodesPairWeightDict(secondTimeSpan)
     vectorsDictFile = open("./temp data/Vectors" + firstTimeSpan)
     vectorsDict = pickle.load(vectorsDictFile)
     y = []  #y: a Python list/tuple of l labels (type must be int/double).
     x = [
     ]  #x: a Python list/tuple of l data instances. Each element of x must be an instance of list/tuple/dictionary type.
     for i in vectorsDict:
         for j in vectorsDict[i]:
             x.append(vectorsDict[i][j])
             print("i: %s, j: %s" % (i, j))
             if i in nodesDict and j in nodesDict[i]:
                 y.append(1)
             else:
                 y.append(-1)
     return y, x
Exemplo n.º 8
0
def OverlappingCommunityDetection(startTime, endTime):
    timeSpan = StringProcessing.GetTimeSpan(startTime, endTime)
    delimiter = " "
    fullFileName = "../../../data/facebook-wosn-wall/edges" + timeSpan + ".data"

    nodesPairWeightDict = InitialData.InitialNodesPairWeightDict(
        startTime, endTime)
    ij2wij = {}
    for i in nodesPairWeightDict:
        for j in nodesPairWeightDict[i]:
            ij2wij[str(i), str(j)] = nodesPairWeightDict[i][j]

    basename = os.path.splitext(fullFileName)[0]
    adj, edges = read_edgelist_unweighted(fullFileName, delimiter=delimiter)

    edge2cid, S_max, D_max, list_D = HLC(adj, edges).single_linkage(w=ij2wij)
    write_edge2cid(edge2cid, timeSpan, delimiter=delimiter)
Exemplo n.º 9
0
def GenerateVector(paths, nodesDict):
    pathSet = set()
    vector = {}
    for path in paths:
        if HashPath(path) not in pathSet:
            pathSet.add(HashPath(path))
            pathDict = CompleteGroup(path, nodesDict)
            nodeMapping = {path[0]: 1, path[-1]: 2}
            for i in xrange(1, len(path) - 1):
                nodeMapping[path[i]] = i + 2
            adjacencyMatrix = InitialData.InitialMatrix()
            for i in pathDict:
                for j in pathDict[i]:
                    adjacencyMatrix[nodeMapping[i]][nodeMapping[j]] = 1
                    adjacencyMatrix[nodeMapping[j]][nodeMapping[i]] = 1
            minAddress = CalSubgraphAddress.ArgMinAddress(adjacencyMatrix)
            vector[minAddress] = vector.get(minAddress, 0) + 1
    return vector
def CommunityDetection(timeSpan):
    G = nx.Graph()
    nodesPairWeightDict = InitialData.InitialNodesPairWeightDict(timeSpan)
    for i in nodesPairWeightDict:
        for j in nodesPairWeightDict[i]:
            G.add_edge(i, j, weight=nodesPairWeightDict[i][j])
    partition = community.best_partition(G)

    partitionDict = {}
    for i in partition:
        if partition[i] not in partitionDict:
            partitionDict[partition[i]] = []
        partitionDict[partition[i]].append(i)

    partitionSets = partitionDict.values()
    partitionSetsFile = open("./temp data/PartitionSets" + timeSpan, "w")
    pickle.dump(partitionSets, partitionSetsFile)
    partitionSetsFile.close()
Exemplo n.º 11
0
def GenerateVector(paths, nodesDict, addressDict):
    pathSet = set()
    vector = {}
    for path in paths:
        hashValue = HashPath(path)
        if hashValue not in pathSet:
            pathSet.add(hashValue)
            pathDict = CompleteGroup(path, nodesDict)
            nodeMapping = {path[0]: 1, path[-1]: 2}
            for i in xrange(1, len(path) - 1):
                nodeMapping[path[i]] = i + 2
            adjacencyMatrix = InitialData.InitialMatrix()
            for i in pathDict:
                for j in pathDict[i]:
                    adjacencyMatrix[nodeMapping[i]][nodeMapping[j]] = 1
                    #adjacencyMatrix[nodeMapping[j]][nodeMapping[i]] = 1
            adjacencyMatrixAddress = Address().GetAddress(adjacencyMatrix)
            subAddress = addressDict[adjacencyMatrixAddress]
            vector[subAddress] = vector.get(subAddress, 0) + 1
    return vector
    def GenerateXAndYForTraining(self):
        nodesDict = InitialData.InitialNodesPairWeightDict(
            self.secondTrainingStartTime, self.secondTrainingEndTime)
        firstTimeSpan = StringProcessing.GetTimeSpan(
            self.firstTrainingStartTime, self.firstTrainingEndTime)
        vectorsDictFile = open("./temp data/Vectors" + firstTimeSpan)
        vectorsDict = pickle.load(vectorsDictFile)
        row = []
        col = []
        data = []
        y = []

        row.append(0)
        col.append(0)
        data.append(1)
        y.append(-1)

        rowId = 1

        for i in vectorsDict:
            for j in vectorsDict[i]:
                for k in vectorsDict[i][j]:
                    row.append(rowId)
                    col.append(k)
                    data.append(vectorsDict[i][j][k])
                if i in nodesDict and j in nodesDict[i]:
                    y.append(1)
                else:
                    y.append(-1)
                rowId += 1

        row.append(rowId)
        col.append(4194300)
        data.append(1)
        y.append(-1)

        x = csr_matrix((data, (row, col)))
        return x, np.array(y)
Exemplo n.º 13
0
def WriteAllConnectedComponentsToFile(timeSpan):
	nodesPairWeightDict = InitialData.InitialNodesPairWeightDict(timeSpan)
	connectedComponents = FindAllConnectedComponents(nodesPairWeightDict)
	connectedComponentsFile = open("./temp data/ConnectedComponents_" + timeSpan, "w")
	pickle.dump(connectedComponents, connectedComponentsFile)
	connectedComponentsFile.close()
Exemplo n.º 14
0
import datetime
from datetime import date
from datetime import timedelta
import calendar
import numpy as np
from InitialData import *
from Aircraft import *
from AircraftDatabase import *
from Workshop import *
from HangarDatabase import *
from Maintenance import *
from MaintenanceDatabase import *
from Schedule import *
from ScehduleDatabase import *

initial_data = InitialData()
initial_data.LoadAllFile()


#Load the data --> in MIP Model & MIP Model Sampel
#Loading data: InitialData.py


#Preprocessing 
#Aircraft Data: Aircraft.py, AircraftDatabase.py
#Hangar Data: HangarDatabase.py, Workshop.py
#Scheduling Data: Schedule.py


#Define sets and indexes
#Define index
Exemplo n.º 15
0
import InitialData
import pickle

# ###merge 9####
# #filesNames, fullFilesNames = InitialData.FileWalker("./temp data/Vectors1970_1985/9")
# vectorsDict = {}
# for fullFileName in fullFilesNames:
# 	vectorDictFile = open(fullFileName)
# 	vectorDict = pickle.load(vectorDictFile)
# 	for i in vectorDict:
# 		if i not in vectorsDict:
# 			vectorsDict[i] = {}
# 		vectorsDict[i].update(vectorDict[i])
# vectorsDictFile = open("./temp data/Vectors1970_1985/Vectors_9", "w")
# pickle.dump(vectorsDict, vectorsDictFile)

vectorsDict = {}
filesNames, fullFilesNames = InitialData.FileWalker(
    "./temp data/Vectors1970_1985_bak")
for fullFileName in fullFilesNames:
    print fullFileName
    vectorDictFile = open(fullFileName)
    vectorsDict.update(pickle.load(vectorDictFile))

vectorsDictFile = open("./temp data/Vectors1970_1985", "w")
pickle.dump(vectorsDict, vectorsDictFile)
Exemplo n.º 16
0
def CommunityDetection(timeSpan):
    #nodesPairWeightDict = {0:{2:1, 4:1, 5:1, 3:1}, 1:{2:1, 4:1, 7:1}, 2:{0:1, 1:1, 4:1, 5:1, 6:1}, 3:{0:1, 7:1}, 4:{0:1, 1:1, 2:1, 10:1}, 5:{0:1, 2:1, 7:1, 11:1}, 6:{2:1, 7:1, 11:1}, 7:{1:1, 3:1, 5:1, 6:1}, 8:{15:1, 14:1, 10:1, 9:1, 11:1}, 9:{8:1, 14:1, 12:1}, 10:{12:1, 14:1, 4:1, 8:1, 11:1, 13:1}, 11:{13:1, 10:1, 8:1, 5:1, 6:1}, 12:{9:1, 10:1}, 13:{10:1, 11:1}, 14:{8:1, 9:1, 10:1}, 15:{8:1}}
    threshold = 0.000001
    connectedComponents = ConnectedComponents.ReadAllConnectedComponentsFromFile(
        timeSpan)
    allNodesPairWeightDict = InitialData.InitialNodesPairWeightDict(timeSpan)
    partitionSets = []
    for connectedComponent in connectedComponents:
        print "connectedComponent size = ", len(connectedComponent)

        nodesPairWeightDict = {}
        for i in connectedComponent:
            nodesPairWeightDict[i] = allNodesPairWeightDict[i]

        nodesPartitionDict = {}
        for i in nodesPairWeightDict:
            nodesPartitionDict[i] = set()
            nodesPartitionDict[i].add(i)

        c = {}
        for i in nodesPairWeightDict:
            c[i] = i

        while True:
            (m, nodesWeightDict) = GetNodesWeight(nodesPairWeightDict)
            qGlobal = GetModularity(nodesWeightDict, nodesPairWeightDict, m, c)
            while True:
                isChanged = False
                for i in nodesPairWeightDict:
                    gainMax = 0.0
                    cMax = None
                    for j in nodesPairWeightDict[i]:
                        qGain = GetModularityGain(nodesWeightDict,
                                                  nodesPairWeightDict, m, c, i,
                                                  c[j])
                        if qGain > gainMax:
                            gainMax = qGain
                            cMax = c[j]
                    if cMax != None:
                        if c[i] != cMax:
                            c[i] = cMax
                            isChanged = True
                if not isChanged:
                    break

            newNodesPairWeightDict = {}
            for i in nodesPairWeightDict.keys():
                if c[i] not in newNodesPairWeightDict:
                    newNodesPairWeightDict[c[i]] = {}
                for j in nodesPairWeightDict[i].keys():
                    newNodesPairWeightDict[
                        c[i]][c[j]] = newNodesPairWeightDict[c[i]].get(
                            c[j], 0) + nodesPairWeightDict[i][j]
            nodesPairWeightDict = newNodesPairWeightDict
            (m, nodesWeightDict) = GetNodesWeight(nodesPairWeightDict)

            newNodesPartitionDict = {}
            for i in c:
                if c[i] not in newNodesPartitionDict:
                    newNodesPartitionDict[c[i]] = []
                newNodesPartitionDict[c[i]] = list(
                    set(newNodesPartitionDict[c[i]]).union(
                        set(nodesPartitionDict[i])))
            nodesPartitionDict = newNodesPartitionDict

            c = {}
            for i in nodesPairWeightDict:
                c[i] = i

            qNew = GetModularity(nodesWeightDict, nodesPairWeightDict, m, c)
            if qNew - qGlobal < threshold:
                break
            qGlobal = qNew
        partitionSets += nodesPartitionDict.values()
    partitionSetsFile = open("./temp data/PartitionSets" + timeSpan, "w")
    pickle.dump(partitionSets, partitionSetsFile)
    partitionSetsFile.close()
Exemplo n.º 17
0
minutetable = 'stock_minute'
finnhub_client = finnhub.Client(api_key="bv4f2qn48v6qpatdiu3g")

#print("Here is a")
if updatestockdata.check_Table('stock_daily') is False:
    sqlcommand = "CREATE TABLE IF NOT EXISTS " + dailytable + " (" \
                                                              "symbol   VARCHAR(50) NOT NULL, " \
                                                              "close    FLOAT NOT NULL, " \
                                                              "high     FLOAT NOT NULL, " \
                                                              "low      FLOAT NOT NULL, " \
                                                              "open     FLOAT NOT NULL, " \
                                                              "time     INT NOT NULL, " \
                                                              "volume   FLOAT NOT NULL) "

    util.execute_sql(sqlcommand)
    initialdata.initial_stockdata('sec_list_1000.csv', 'D', 979527600,
                                  1610582400, dailytable)
    logging.info('Create the database table ' + dailytable)

if updatestockdata.check_Table('stock_minute') is False:
    sqlcommand = "CREATE TABLE IF NOT EXISTS " + minutetable + " (" \
                                                               "symbol   VARCHAR(50) NOT NULL, " \
                                                               "close    FLOAT NOT NULL, " \
                                                               "high     FLOAT NOT NULL, " \
                                                               "low      FLOAT NOT NULL, " \
                                                               "open     FLOAT NOT NULL, " \
                                                               "time     INT NOT NULL, " \
                                                               "volume   FLOAT NOT NULL) "

    util.execute_sql(sqlcommand)
    initialdata.initial_stockdata('sec_list_1000.csv', '1', 979527600,
                                  1610582400, minutetable)
Exemplo n.º 18
0
import InitialData
import pickle

# # ###merge 9####
# filesNames, fullFilesNames = InitialData.FileWalker("./temp data/8_9/")
# vectorsDict = {}
# for fullFileName in fullFilesNames:
# 	vectorDictFile = open(fullFileName)
# 	vectorDict = pickle.load(vectorDictFile)
# 	for i in vectorDict:
# 		if i not in vectorsDict:
# 			vectorsDict[i] = {}
# 		vectorsDict[i].update(vectorDict[i])
# vectorsDictFile = open("./temp data/Vectors2004_2006_8_9", "w")
# pickle.dump(vectorsDict, vectorsDictFile)

vectorsDict = {}
filesNames, fullFilesNames = InitialData.FileWalker(
    "./temp data/Vectors2004_2006_parts")
for fullFileName in fullFilesNames:
    print fullFileName
    vectorDictFile = open(fullFileName)
    vectorsDict.update(pickle.load(vectorDictFile))

vectorsDictFile = open("./temp data/Vectors2004_2006_part1", "w")
pickle.dump(vectorsDict, vectorsDictFile)
Exemplo n.º 19
0
	def GenerateRank(self, partitionMethod = "ConnectedComponents"):
		timeSpan = StringProcessing.GetTimeSpan(self.firstStartTime, self.firstEndTime)
		nodesDict = InitialData.InitialNodesPairWeightDict(self.firstStartTime, self.firstEndTime)
		nodesDictLabel = InitialData.InitialNodesPairWeightDict(self.secondStartTime, self.secondEndTime)
		if partitionMethod == "ConnectedComponents":
			connectedComponents = ConnectedComponents.ReadAllConnectedComponentsFromFile(self.firstStartTime, self.firstEndTime)
		elif partitionMethod == "CommunityDetection":
			connectedComponents = CommunityDetection.ReadCommunitiesFromFile(self.firstStartTime, self.firstEndTime)
		else:
			connectedComponents = LinkClustering.ReadAllConnectedComponentsFromFile(self.firstStartTime, self.firstEndTime)
		pairDict = {}
		y_true = []
		y_score = []
		vectorsDict = {}
		for nodes in connectedComponents:
			print("Components size = %d"%len(nodes))
			
			G = nx.Graph()
			componentNodesDict = {}
			for i in nodes:
				if i not in componentNodesDict:
					componentNodesDict[i] = []
				for j in nodesDict[i]:
					if j in nodes:
						componentNodesDict[i].append(j)
						G.add_edge(i, j)

			for i in xrange(0, len(nodes)):
				startTime = time.time()
				for j in xrange(i + 1, len(nodes)):
					if nodes[j] not in componentNodesDict[nodes[i]]:
						if nodes[i] not in pairDict:
							pairDict[nodes[i]] = {}
						pairDict[nodes[i]][nodes[j]] = 1
						# startTime = time.time()
						iNeighbors = []
						for neighbor in nodesDict[nodes[i]]:
							iNeighbors.append(neighbor)
						jNeighbors = []
						for neighbor in nodesDict[nodes[j]]:
							jNeighbors.append(neighbor)
						intersect = set(iNeighbors)&set(jNeighbors)
						y_score.append(len(intersect))
						if nodes[i] in nodesDictLabel and nodes[j] in nodesDictLabel[nodes[i]]:
							y_true.append(1)
						else:
							y_true.append(-1)
						# endTime = time.time()
						# print("find paths time:%f"%(endTime - startTime))
						
				endTime = time.time()
				print("nodes[%s] generated, finished in %f s"%(nodes[i], endTime - startTime))
		components = ConnectedComponents.ReadAllConnectedComponentsFromFile(self.firstStartTime, self.firstEndTime)
		for component in components:
			for i in component:
				for j in component:
					if i != j:
						if i not in pairDict or (i in pairDict and j not in pairDict[i]):
							if i not in nodesDict or (i in nodesDict and j not in nodesDict[i]):
								if i in nodesDictLabel and j in nodesDictLabel[i]:
									y_true.append(1)
								else:
									y_true.append(-1)
								y_score.append(0)
		return y_true, y_score