def run(self): citationsPublications = { 'Alice': (100, 10), 'Bob': (80, 10), 'Carol': (100, 100), 'Dave': (10, 10), 'Ed': (20, 5) } self.graph, authorMap, conference, citationsPublications = \ SampleGraphUtility.constructSkewedCitationPublicationExample( introduceRandomness=False, citationsPublicationsParameter=citationsPublications ) # Get the nodes we care about authors = [ authorMap['Alice'], authorMap['Bob'], authorMap['Carol'], authorMap['Dave'], authorMap['Ed'] ] # Total citation & publication counts self.output('\nCitation & Publication Counts') adjMatrixTable = texttable.Texttable() rows = [['Measure'] + [author.name for author in authors]] rows += [['Citations'] + [citationsPublications[author][0] for author in authors]] rows += [['Publications'] + [citationsPublications[author][1] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output PathSim similarity scores pathSimStrategyPubs = NeighborSimStrategy(self.graph, [Conference, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, pathSimStrategyPubs, 'APCPA PathSim') pathSimStrategyCits = NeighborSimStrategy(self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, pathSimStrategyCits, 'APPCPPA PathSim') for w1, w2 in [(0.5, 0.5), (0.6, 0.4), (0.4, 0.6), (0.7, 0.3), (0.3, 0.7)]: combinedPathSimStrategy = AggregateSimilarityStrategy( self.graph, [pathSimStrategyPubs, pathSimStrategyCits], [w1, w2] ) self.outputSimilarityScores( authorMap, authors, combinedPathSimStrategy, 'APCPA-APPCPPAA Pathsim (%1.1f,%1.1f)' % (w1, w2) ) # Output ShapeSim strategy w = 1.0 neighborPathShapeStrategy = VectorProductStrategy( self.graph, weight=w, omit=[], metaPath=[Conference, Paper, Paper, Author], symmetric=True ) strategyTitle = 'APPCPPA %s ShapeSim (%1.2f weight)' % ('VectorProduct', w) self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle)
def testFindAllSimilarityFromNodeOnPathSimExampleThree(self): """ Tests similarity for all other nodes given a signle node, using example 3 from PathSim paper """ graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree() strategy = PageRankStrategy(graph) mike = authorMap['Mike'] mostSimilarNodes = strategy.findMostSimilarNodes(mike, 1) self.assertEquals([authorMap['Ann']], mostSimilarNodes)
def testFindAllSimilarityFromNodeOnPathSimExampleThree(self): """ Tests similarity for all other nodes given a single node, using example 3 from PathSim paper """ graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree() metaPath = [Author, Paper, Conference, Paper, Author] strategy = PathSimStrategy(graph, metaPath) mike = authorMap['Mike'] mostSimilarNodes = strategy.findMostSimilarNodes(mike, 5) self.assertEquals([authorMap['Bob'], authorMap['Mary'], authorMap['Jim']], mostSimilarNodes)
def testFindAllSimilarityFromNodeOnPathSimExampleThree(self): """ Tests similarity for all other nodes given a signle node, using example 3 from PathSim paper """ graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree( ) strategy = PageRankStrategy(graph) mike = authorMap['Mike'] mostSimilarNodes = strategy.findMostSimilarNodes(mike, 1) self.assertEquals([authorMap['Ann']], mostSimilarNodes)
def testFindAllSimilarityFromNodeOnPathSimExampleThree(self): """ Tests similarity for all other nodes given a single node, using example 3 from PathSim paper """ graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree( ) metaPath = [Author, Paper, Conference, Paper, Author] strategy = PathSimStrategy(graph, metaPath) mike = authorMap['Mike'] mostSimilarNodes = strategy.findMostSimilarNodes(mike, 5) self.assertEquals( [authorMap['Bob'], authorMap['Mary'], authorMap['Jim']], mostSimilarNodes)
def testFindSingleSimilarityPathSimExampleThree(self): """ Tests pairwise similarity for nodes, using example 3 from PathSim paper (compute similarity scores from Mike) """ graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree() metaPath = [Author, Paper, Conference, Paper, Author] strategy = PathSimStrategy(graph, metaPath) mike = authorMap['Mike'] jimScore, maryScore, bobScore, annScore = strategy.findSimilarityScores( mike, [authorMap['Jim'], authorMap['Mary'], authorMap['Bob'], authorMap['Ann']] ) self.assertEquals(bobScore, max([jimScore, maryScore, bobScore, annScore])) self.assertEquals(annScore, 0)
def testFindSingleSimilarityPathSimExampleThree(self): """ Tests pairwise similarity for nodes, using example 3 from PathSim paper (compute similarity scores from Mike) """ graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree() strategy = PageRankStrategy(graph) mike = authorMap['Mike'] jimScore = strategy.findSimilarityScore(mike, authorMap['Jim']) maryScore = strategy.findSimilarityScore(mike, authorMap['Mary']) bobScore = strategy.findSimilarityScore(mike, authorMap['Bob']) annScore = strategy.findSimilarityScore(mike, authorMap['Ann']) self.assertTrue(annScore >= maryScore) self.assertTrue(annScore >= jimScore) self.assertTrue(annScore >= bobScore)
def testFindSingleSimilarityPathSimExampleThree(self): """ Tests pairwise similarity for nodes, using example 3 from PathSim paper (compute similarity scores from Mike) """ graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree( ) strategy = PageRankStrategy(graph) mike = authorMap['Mike'] jimScore = strategy.findSimilarityScore(mike, authorMap['Jim']) maryScore = strategy.findSimilarityScore(mike, authorMap['Mary']) bobScore = strategy.findSimilarityScore(mike, authorMap['Bob']) annScore = strategy.findSimilarityScore(mike, authorMap['Ann']) self.assertTrue(annScore >= maryScore) self.assertTrue(annScore >= jimScore) self.assertTrue(annScore >= bobScore)
def testFindSingleSimilarityPathSimExampleThree(self): """ Tests pairwise similarity for nodes, using example 3 from PathSim paper (compute similarity scores from Mike) """ graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree( ) metaPath = [Author, Paper, Conference, Paper, Author] strategy = PathSimStrategy(graph, metaPath) mike = authorMap['Mike'] jimScore, maryScore, bobScore, annScore = strategy.findSimilarityScores( mike, [ authorMap['Jim'], authorMap['Mary'], authorMap['Bob'], authorMap['Ann'] ]) self.assertEquals(bobScore, max([jimScore, maryScore, bobScore, annScore])) self.assertEquals(annScore, 0)
def testConstructPathSimExampleThree(self): """ Tests the construction of "Example 3" from PathSim paper. Specifically, checks adjacency matrix shown in this example for Author-Paper-Conference meta paths. """ graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree() metaPath = [Author, Paper, Conference] metaPathUtility = EdgeBasedMetaPathUtility() # Mike's adjacency to conferences self.assertEquals(2, len(metaPathUtility.findMetaPaths(graph, authorMap['Mike'], conferenceMap['SIGMOD'], metaPath))) self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Mike'], conferenceMap['VLDB'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Mike'], conferenceMap['ICDE'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Mike'], conferenceMap['KDD'], metaPath))) # Jim's adjacency to conferences self.assertEquals(50, len(metaPathUtility.findMetaPaths(graph, authorMap['Jim'], conferenceMap['SIGMOD'], metaPath))) self.assertEquals(20, len(metaPathUtility.findMetaPaths(graph, authorMap['Jim'], conferenceMap['VLDB'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Jim'], conferenceMap['ICDE'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Jim'], conferenceMap['KDD'], metaPath))) # Mary's adjacency to conferences self.assertEquals(2, len(metaPathUtility.findMetaPaths(graph, authorMap['Mary'], conferenceMap['SIGMOD'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Mary'], conferenceMap['VLDB'], metaPath))) self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Mary'], conferenceMap['ICDE'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Mary'], conferenceMap['KDD'], metaPath))) # Bob's adjacency to conferences self.assertEquals(2, len(metaPathUtility.findMetaPaths(graph, authorMap['Bob'], conferenceMap['SIGMOD'], metaPath))) self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Bob'], conferenceMap['VLDB'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Bob'], conferenceMap['ICDE'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Bob'], conferenceMap['KDD'], metaPath))) # Ann's adjacency to conferences self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Ann'], conferenceMap['SIGMOD'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Ann'], conferenceMap['VLDB'], metaPath))) self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Ann'], conferenceMap['ICDE'], metaPath))) self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Ann'], conferenceMap['KDD'], metaPath)))
def run(self): citationsPublications = { 'Alice': (100, 10), 'Bob': (80, 10), 'Carol': (100, 100), 'Dave': (10, 10), 'Ed': (20, 5) } self.graph, authorMap, conference, citationsPublications = \ SampleGraphUtility.constructSkewedCitationPublicationExample( introduceRandomness=False, citationsPublicationsParameter=citationsPublications ) # Get the nodes we care about authors = [ authorMap['Alice'], authorMap['Bob'], authorMap['Carol'], authorMap['Dave'], authorMap['Ed'] ] # Total citation & publication counts self.output('\nCitation & Publication Counts') adjMatrixTable = texttable.Texttable() rows = [['Measure'] + [author.name for author in authors]] rows += [['Citations'] + [citationsPublications[author][0] for author in authors]] rows += [['Publications'] + [citationsPublications[author][1] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output PathSim similarity scores pathSimStrategyPubs = NeighborSimStrategy(self.graph, [Conference, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, pathSimStrategyPubs, 'APCPA PathSim') pathSimStrategyCits = NeighborSimStrategy( self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, pathSimStrategyCits, 'APPCPPA PathSim') for w1, w2 in [(0.5, 0.5), (0.6, 0.4), (0.4, 0.6), (0.7, 0.3), (0.3, 0.7)]: combinedPathSimStrategy = AggregateSimilarityStrategy( self.graph, [pathSimStrategyPubs, pathSimStrategyCits], [w1, w2]) self.outputSimilarityScores( authorMap, authors, combinedPathSimStrategy, 'APCPA-APPCPPAA Pathsim (%1.1f,%1.1f)' % (w1, w2)) # Output ShapeSim strategy w = 1.0 neighborPathShapeStrategy = VectorProductStrategy( self.graph, weight=w, omit=[], metaPath=[Conference, Paper, Paper, Author], symmetric=True) strategyTitle = 'APPCPPA %s ShapeSim (%1.2f weight)' % ( 'VectorProduct', w) self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle)
def run(self): citationMap = { 'Mike': {'Mike': 0, 'Jim': 0, 'Mary': 0, 'Bob': 0, 'Ann': 0, 'Joe': 0, 'Nancy': 0}, 'Jim': {'Mike': 20, 'Jim': 0, 'Mary': 20, 'Bob': 20, 'Ann': 0, 'Joe': 20, 'Nancy': 0}, 'Mary': {'Mike': 1, 'Jim': 10, 'Mary': 0, 'Bob': 1, 'Ann': 0, 'Joe': 1, 'Nancy': 0}, 'Bob': {'Mike': 1, 'Jim': 10, 'Mary': 1, 'Bob': 0, 'Ann': 0, 'Joe': 1, 'Nancy': 0}, 'Ann': {'Mike': 0, 'Jim': 0, 'Mary': 0, 'Bob': 0, 'Ann': 0, 'Joe': 0, 'Nancy': 0}, 'Joe': {'Mike': 0, 'Jim': 0, 'Mary': 0, 'Bob': 0, 'Ann': 0, 'Joe': 0, 'Nancy': 0}, 'Nancy': {'Mike': 1, 'Jim': 10, 'Mary': 1, 'Bob': 1, 'Ann': 0, 'Joe': 1, 'Nancy': 0} } self.graph, authorMap, conferenceMap =\ SampleGraphUtility.constructPathSimExampleThree(extraAuthorsAndCitations=True, citationMap = citationMap) # Get the nodes we care about conferences = [ conferenceMap['SIGMOD'], conferenceMap['VLDB'], conferenceMap['ICDE'], conferenceMap['KDD'] ] authors = [ authorMap['Mike'], authorMap['Jim'], authorMap['Mary'], authorMap['Bob'], authorMap['Ann'], authorMap['Joe'], authorMap['Nancy'], ] metaPathUtility = EdgeBasedMetaPathUtility() # Project a 2-typed heterogeneous graph over adapted PathSim example publicationProjectedGraph = metaPathUtility.createHeterogeneousProjection(self.graph, [Author, Paper, Conference], symmetric = True) self.output('\nAdjacency Matrix (Projected):') adjMatrixTable = texttable.Texttable() rows = [['Author'] + [conference.name for conference in conferences]] rows += [[author.name] + [publicationProjectedGraph.getNumberOfEdges(author, conference) for conference in conferences] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Project a homogeneous citation graph over adapted PathSim example citationProjectedGraph = metaPathUtility.createHomogeneousProjection(self.graph, [Author, Paper, Paper, Author]) self.output('\nCitation Matrix:') adjMatrixTable = texttable.Texttable() rows = [['Author'] + [author.name for author in authors]] rows += [[author.name] + [citationProjectedGraph.getNumberOfEdges(author, otherAuthor) for otherAuthor in authors] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output total out/in citations self.output('\nCitations Total:') totalCitationsTable = texttable.Texttable() rows = [['Author', 'In', 'Out']] for author in authors: inCount = sum(citationProjectedGraph.getNumberOfEdges(otherAuthor, author) for otherAuthor in authors) outCount = sum(citationProjectedGraph.getNumberOfEdges(author, otherAuthor) for otherAuthor in authors) rows += [[author.name, inCount, outCount]] totalCitationsTable.add_rows(rows) self.output(totalCitationsTable.draw()) # Get PathSim similarity scores pathSimStrategy = PathSimStrategy(self.graph, [Author, Paper, Conference, Paper, Author], True) self.outputSimilarityScores(authorMap, authors, pathSimStrategy, 'APCPA PathSim') # Output SimRank-related scores strategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, strategy, "SimRank") # Output the projected PageRank/HITS similarity scores for name, algorithm in zip(['PageRank', 'HITS'], [PageRankDistanceStrategy, HITSDistanceStrategy]): strategy = algorithm(self.graph, [Author, Paper, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, strategy, "%s-Distance" % name) # Get NeighborSim similarity scores inNeighborSimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, inNeighborSimStrategy, 'APPA NeighborSim-In') outNeighborSimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Paper, Author], reversed=True, smoothed=True) self.outputSimilarityScores(authorMap, authors, outNeighborSimStrategy, 'APPA NeighborSim-Out') # Combined best PR-distance algorithm simRankStrategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author], symmetric=True) simRank = AggregateSimilarityStrategy(self.graph, [pathSimStrategy, simRankStrategy], [0.5, 0.5]) self.outputSimilarityScores(authorMap, authors, simRank, 'APCPA Pathsim, APPA SimRank') # Combined best neighborsim score combinedNeighborSim = AggregateSimilarityStrategy(self.graph, [pathSimStrategy, inNeighborSimStrategy, outNeighborSimStrategy], [0.6, 0.2, 0.2]) self.outputSimilarityScores(authorMap, authors, combinedNeighborSim, 'APCPA Pathsim, APPA NeighborSim-Combined')
def run(self): self.graph, authorMap, conference, citationsPublications = \ SampleGraphUtility.constructSkewedCitationPublicationExample(introduceRandomness=False) # Get the nodes we care about authors = [ authorMap['Alice'], authorMap['Bob'], authorMap['Carol'], authorMap['Dave'], authorMap['Ed'], authorMap['Frank'] ] metaPathUtility = EdgeBasedMetaPathUtility() # Output adjacency matrices self.output('\nCPA Adjacency Matrix:') cpaadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph( self.graph, [Conference, Paper, Author], project=True ) adjMatrixTable = texttable.Texttable() rows = [['Conference'] + [author.name for author in authors]] rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) self.output('\nCPPA Adjacency Matrix:') cpaadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph( self.graph, [Conference, Paper, Paper, Author], project=True ) adjMatrixTable = texttable.Texttable() rows = [['Conference'] + [author.name for author in authors]] rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Total citation & publication counts self.output('\nCitation & Publication Counts') adjMatrixTable = texttable.Texttable() rows = [['Measure'] + [author.name for author in authors]] rows += [['Citations'] + [citationsPublications[author][0] for author in authors]] rows += [['Publications'] + [citationsPublications[author][1] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output NeighborSim & PathSim similarity scores neighborSimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, 'APCPA PathSim') neighborSimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, 'APPCPPA PathSim') # Omit extra duplicate entry in path, and weight at different levels of 'relative' for strategy, generalStrategyTitle in [(FlattenedMatrixStrategy, 'FlatMat'), (VectorProductStrategy, 'VectorProduct')]: for w in [1.0, 0.5, 0]: neighborPathShapeStrategy = strategy( self.graph, weight=w, omit=[], metaPath=[Conference, Paper, Paper, Author], symmetric=True ) strategyTitle = 'APPCPPA %s ShapeSim (%1.2f weight)' % (generalStrategyTitle, w) self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle) w = 1.0 neighborPathShapeStrategy = VectorProductStrategy( self.graph, weight=w, omit=[0], metaPath=[Conference, Paper, Paper, Author], symmetric=True ) strategyTitle = 'APPCPPA VectorProduct ShapeSim omitting CPC (%1.2f weight)' % w self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle) # Output recursive pathsim strategy score(s) recursivePathSimStrategy = RecursivePathSimStrategy(self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, recursivePathSimStrategy, 'APPCPPA Recursive PathSim')
def run(self): self.graph, authorMap, conferenceMap, totalCitationCount = SampleGraphUtility.constructMultiDisciplinaryAuthorExample() # Get the nodes we care about conferences = [ conferenceMap['VLDB'], conferenceMap['KDD'] ] authors = [ authorMap['A'], authorMap['B'], authorMap['C'], authorMap['D'], authorMap['E'], authorMap['F'], authorMap['G'], authorMap['H'], authorMap['I'], ] self.metaPathUtility = EdgeBasedMetaPathUtility() # Build homogeneous projection of network (authors, with edges for times authors cite each other) projectedGraph = self.metaPathUtility.createHomogeneousProjection(self.graph, [Author, Paper, Paper, Author]) authorCitationCounts = {} for author in projectedGraph.getNodes(): authorCitationCounts[author] = {} for otherAuthor in projectedGraph.getNodes(): authorCitationCounts[author][otherAuthor] = projectedGraph.getNumberOfEdges(author, otherAuthor) # Output the adjacency matrix for authors-authors in the graph self.output('\nCitation Matrix:') adjMatrixTable = texttable.Texttable() rows = [['Author'] + [author.name for author in authors]] rows += [[author.name] + [authorCitationCounts[author][otherAuthor] for otherAuthor in authors] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output the adjacency matrix for authors & conferences in the graph self.output('\nAdjacency Matrix:') adjMatrixTable = texttable.Texttable() projectedGraph = self.metaPathUtility.createHeterogeneousProjection(self.graph, [Author, Paper, Conference]) rows = [[''] + [conference.name for conference in conferences]] rows += [[author.name] + [projectedGraph.getNumberOfEdges(author, conference) for conference in conferences] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output total citation counts self.output('\nTotal Citation Counts:') rows = [[author.name for author in authors],['%d' % totalCitationCount[author.name] for author in authors]] citationCountTable = texttable.Texttable() citationCountTable.add_rows(rows) self.output(citationCountTable.draw()) # Output the NeighborSim similarity scores strategy = NeighborSimStrategy(self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, strategy, "NeighborSim") # Output the PathSim similarity scores pathsimStretegy = PathSimStrategy(self.graph, [Author, Paper, Conference, Paper, Author], True) self.outputSimilarityScores(authorMap, authors, pathsimStretegy, "PathSim") # Output SimRank-related scores simrankStrategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, simrankStrategy, "SimRank") # Output pathsim - simrank scores combinedNeighborSim = AggregateSimilarityStrategy(self.graph, [simrankStrategy, pathsimStretegy], [0.5, 0.5]) self.outputSimilarityScores(authorMap, authors, combinedNeighborSim, 'APCPA Pathsim, APPA SimRank') # Output the projected PageRank/HITS similarity scores for name, algorithm in zip(['PageRank', 'HITS'], [PageRankDistanceStrategy, HITSDistanceStrategy]): researchAreas = { (authorMap['A'], authorMap['B'], authorMap['C'], authorMap['D'], authorMap['E'], authorMap['I']), (authorMap['F'], authorMap['G'], authorMap['H'], authorMap['D'], authorMap['E'], authorMap['I']), } strategy = algorithm(self.graph, [Author, Paper, Paper, Author], nodeSets=researchAreas, symmetric=True) self.outputSimilarityScores(authorMap, authors, strategy, "%s-Distance" % name)
def run(self): citationMap = { 'Mike': { 'Mike': 0, 'Jim': 0, 'Mary': 0, 'Bob': 0, 'Ann': 0, 'Joe': 0, 'Nancy': 0 }, 'Jim': { 'Mike': 20, 'Jim': 0, 'Mary': 20, 'Bob': 20, 'Ann': 0, 'Joe': 20, 'Nancy': 0 }, 'Mary': { 'Mike': 1, 'Jim': 10, 'Mary': 0, 'Bob': 1, 'Ann': 0, 'Joe': 1, 'Nancy': 0 }, 'Bob': { 'Mike': 1, 'Jim': 10, 'Mary': 1, 'Bob': 0, 'Ann': 0, 'Joe': 1, 'Nancy': 0 }, 'Ann': { 'Mike': 0, 'Jim': 0, 'Mary': 0, 'Bob': 0, 'Ann': 0, 'Joe': 0, 'Nancy': 0 }, 'Joe': { 'Mike': 0, 'Jim': 0, 'Mary': 0, 'Bob': 0, 'Ann': 0, 'Joe': 0, 'Nancy': 0 }, 'Nancy': { 'Mike': 1, 'Jim': 10, 'Mary': 1, 'Bob': 1, 'Ann': 0, 'Joe': 1, 'Nancy': 0 } } self.graph, authorMap, conferenceMap =\ SampleGraphUtility.constructPathSimExampleThree(extraAuthorsAndCitations=True, citationMap = citationMap) # Get the nodes we care about conferences = [ conferenceMap['SIGMOD'], conferenceMap['VLDB'], conferenceMap['ICDE'], conferenceMap['KDD'] ] authors = [ authorMap['Mike'], authorMap['Jim'], authorMap['Mary'], authorMap['Bob'], authorMap['Ann'], authorMap['Joe'], authorMap['Nancy'], ] metaPathUtility = EdgeBasedMetaPathUtility() # Project a 2-typed heterogeneous graph over adapted PathSim example publicationProjectedGraph = metaPathUtility.createHeterogeneousProjection( self.graph, [Author, Paper, Conference], symmetric=True) self.output('\nAdjacency Matrix (Projected):') adjMatrixTable = texttable.Texttable() rows = [['Author'] + [conference.name for conference in conferences]] rows += [[author.name] + [ publicationProjectedGraph.getNumberOfEdges(author, conference) for conference in conferences ] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Project a homogeneous citation graph over adapted PathSim example citationProjectedGraph = metaPathUtility.createHomogeneousProjection( self.graph, [Author, Paper, Paper, Author]) self.output('\nCitation Matrix:') adjMatrixTable = texttable.Texttable() rows = [['Author'] + [author.name for author in authors]] rows += [[author.name] + [ citationProjectedGraph.getNumberOfEdges(author, otherAuthor) for otherAuthor in authors ] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output total out/in citations self.output('\nCitations Total:') totalCitationsTable = texttable.Texttable() rows = [['Author', 'In', 'Out']] for author in authors: inCount = sum( citationProjectedGraph.getNumberOfEdges(otherAuthor, author) for otherAuthor in authors) outCount = sum( citationProjectedGraph.getNumberOfEdges(author, otherAuthor) for otherAuthor in authors) rows += [[author.name, inCount, outCount]] totalCitationsTable.add_rows(rows) self.output(totalCitationsTable.draw()) # Get PathSim similarity scores pathSimStrategy = PathSimStrategy( self.graph, [Author, Paper, Conference, Paper, Author], True) self.outputSimilarityScores(authorMap, authors, pathSimStrategy, 'APCPA PathSim') # Output SimRank-related scores strategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, strategy, "SimRank") # Output the projected PageRank/HITS similarity scores for name, algorithm in zip( ['PageRank', 'HITS'], [PageRankDistanceStrategy, HITSDistanceStrategy]): strategy = algorithm(self.graph, [Author, Paper, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, strategy, "%s-Distance" % name) # Get NeighborSim similarity scores inNeighborSimStrategy = NeighborSimStrategy( self.graph, [Author, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, inNeighborSimStrategy, 'APPA NeighborSim-In') outNeighborSimStrategy = NeighborSimStrategy( self.graph, [Author, Paper, Paper, Author], reversed=True, smoothed=True) self.outputSimilarityScores(authorMap, authors, outNeighborSimStrategy, 'APPA NeighborSim-Out') # Combined best PR-distance algorithm simRankStrategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author], symmetric=True) simRank = AggregateSimilarityStrategy( self.graph, [pathSimStrategy, simRankStrategy], [0.5, 0.5]) self.outputSimilarityScores(authorMap, authors, simRank, 'APCPA Pathsim, APPA SimRank') # Combined best neighborsim score combinedNeighborSim = AggregateSimilarityStrategy( self.graph, [pathSimStrategy, inNeighborSimStrategy, outNeighborSimStrategy], [0.6, 0.2, 0.2]) self.outputSimilarityScores( authorMap, authors, combinedNeighborSim, 'APCPA Pathsim, APPA NeighborSim-Combined')
def run(self): self.graph, authorMap, conferenceMap, totalCitationCount = SampleGraphUtility.constructMultiDisciplinaryAuthorExample(indirectAuthor = True) # Get the nodes we care about conferences = [ conferenceMap['VLDB'], conferenceMap['KDD'] ] authors = [ authorMap['A'], authorMap['B'], authorMap['C'], authorMap['D'], authorMap['E'], authorMap['F'], authorMap['G'], authorMap['H'], authorMap['I'], authorMap['J'], ] self.metaPathUtility = EdgeBasedMetaPathUtility() # Build homogeneous projection of network (authors, with edges for times authors cite each other) projectedGraph = self.metaPathUtility.createHomogeneousProjection(self.graph, [Author, Paper, Paper, Author]) authorCitationCounts = {} for author in projectedGraph.getNodes(): authorCitationCounts[author] = {} for otherAuthor in projectedGraph.getNodes(): authorCitationCounts[author][otherAuthor] = projectedGraph.getNumberOfEdges(author, otherAuthor) # Output the adjacency matrix for authors-authors in the graph self.output('\nCitation Matrix:') adjMatrixTable = texttable.Texttable() rows = [['Author'] + [author.name for author in authors]] rows += [[author.name] + [authorCitationCounts[author][otherAuthor] for otherAuthor in authors] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output the adjacency matrix for authors & conferences in the graph self.output('\nAdjacency Matrix:') adjMatrixTable = texttable.Texttable() projectedGraph = self.metaPathUtility.createHeterogeneousProjection(self.graph, [Author, Paper, Conference]) rows = [[''] + [conference.name for conference in conferences]] rows += [[author.name] + [projectedGraph.getNumberOfEdges(author, conference) for conference in conferences] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output total citation counts self.output('\nTotal Citation Counts:') rows = [[author.name for author in authors],['%d' % totalCitationCount[author.name] for author in authors]] citationCountTable = texttable.Texttable() citationCountTable.add_rows(rows) self.output(citationCountTable.draw()) # Output the PathSim similarity scores pathsimStretegy = PathSimStrategy(self.graph, [Author, Paper, Conference, Paper, Author], True) self.outputSimilarityScores(authorMap, authors, pathsimStretegy, "PathSim") # Output the NeighborSim similarity scores neighborsimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, neighborsimStrategy, "NeighborSim (CPPA)") # Output the NeighborSim similarity scores neighborsimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, neighborsimStrategy, "NeighborSim (APPA)") # Constant weight propagation strategy propagatedNeighborsimStrategy = NeighborSimConstantPropagationStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 2) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-ConstantPropagation-2") propagatedNeighborsimStrategy = NeighborSimConstantPropagationStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 3) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-ConstantPropagation-3") propagatedNeighborsimStrategy = NeighborSimConstantPropagationStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 4) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-ConstantPropagation-4") propagatedNeighborsimStrategy = NeighborSimConstantPropagationStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 50) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-ConstantPropagation-50") # Preferential attachment propagation strategy propagatedNeighborsimStrategy = NeighborSimConstantPreferentialAttachmentStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 2) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-WeightedPropagation-2") propagatedNeighborsimStrategy = NeighborSimConstantPreferentialAttachmentStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 3) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-WeightedPropagation-3") propagatedNeighborsimStrategy = NeighborSimConstantPreferentialAttachmentStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 4) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-WeightedPropagation-4") propagatedNeighborsimStrategy = NeighborSimConstantPreferentialAttachmentStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 50) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-WeightedPropagation-50") # Neighbor citation count difference strategy citeCountNeighborsimStrategy = NeighborSimStrategy(self.graph, [Paper, Paper, Author], commonNeighbors = False) self.outputSimilarityScores(authorMap, authors, citeCountNeighborsimStrategy, "NeighborSim-CiteCountDiff", citationCounts = totalCitationCount)
def run(self): self.graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree() # Get the nodes we care about conferences = [ conferenceMap['SIGMOD'], conferenceMap['VLDB'], conferenceMap['ICDE'], conferenceMap['KDD'] ] authors = [ authorMap['Mike'], authorMap['Jim'], authorMap['Mary'], authorMap['Bob'], authorMap['Ann'], ] metaPathUtility = EdgeBasedMetaPathUtility() self.output('\nAPC Adjacency Matrix:') apcadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph(self.graph, [Author, Paper, Conference], project=True) adjMatrixTable = texttable.Texttable() rows = [['Author'] + [conference.name for conference in conferences]] rows += [[author.name] + [apcadjMatrix[nodesIndex[author]][nodesIndex[conference]] for conference in conferences] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) self.output('\nCPA Adjacency Matrix:') cpaadjMatrix, dsad = metaPathUtility.getAdjacencyMatrixFromGraph(self.graph, [Conference, Paper, Author], project=True) adjMatrixTable = texttable.Texttable() rows = [['Conference'] + [author.name for author in authors]] rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors] for conference in conferences] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) self.output('\nAPCPA Adjacency Matrix (Computed):') adjMatrix = numpy.dot(apcadjMatrix, cpaadjMatrix) adjMatrixTable = texttable.Texttable() rows = [['Author'] + [author.name for author in authors]] rows += [[author.name] + [adjMatrix[nodesIndex[author]][nodesIndex[otherAuthor]] for otherAuthor in authors] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output homogeneous simrank comparison homogeneousSimRankStrategy = SimRankStrategy(self.graph) self.outputSimilarityScores(authorMap, authors, homogeneousSimRankStrategy, 'Homogeneous SimRank') projectedGraph = metaPathUtility.createHeterogeneousProjection(self.graph, [Author, Paper, Conference], symmetric = True) # Output heterogeneous simrank comparison heterogeneousSimRankStrategy = SimRankStrategy(projectedGraph) self.outputSimilarityScores(authorMap, authors, heterogeneousSimRankStrategy, 'APC Heterogeneous SimRank') # Output heterogeneous simrank w/ squared neighbors comparison def sqNeighborsNorm(graph, a, b, sim): aNeighbors, bNeighbors = graph.getPredecessors(a), graph.getPredecessors(b) return float(len(aNeighbors)**2 * len(bNeighbors)**2) heterogeneousSquaredSimRankStrategy = SimRankStrategy(projectedGraph, normalization=sqNeighborsNorm) self.outputSimilarityScores(authorMap, authors, heterogeneousSquaredSimRankStrategy, 'Squared Heterogeneous SimRank') # Output NeighborSim similarity scores neighborSimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Conference], symmetric=True) self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, 'APC NeighborSim') # Output the PathSim similarity scores pathsimStrategy = PathSimStrategy(self.graph, [Author, Paper, Conference, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, pathsimStrategy, 'APCPA PathSim')
def run(self): self.graph, authorMap, conference, citationsPublications = SampleGraphUtility.constructSkewedCitationPublicationExample( introduceRandomness=False ) # Get the nodes we care about authors = [ authorMap["Alice"], authorMap["Bob"], authorMap["Carol"], authorMap["Dave"], authorMap["Ed"], authorMap["Frank"], ] metaPathUtility = EdgeBasedMetaPathUtility() # Output adjacency matrices self.output("\nCPA Adjacency Matrix:") cpaadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph( self.graph, [Conference, Paper, Author], project=True ) adjMatrixTable = texttable.Texttable() rows = [["Conference"] + [author.name for author in authors]] rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) self.output("\nCPPA Adjacency Matrix:") cpaadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph( self.graph, [Conference, Paper, Paper, Author], project=True ) adjMatrixTable = texttable.Texttable() rows = [["Conference"] + [author.name for author in authors]] rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Total citation & publication counts self.output("\nCitation & Publication Counts") adjMatrixTable = texttable.Texttable() rows = [["Measure"] + [author.name for author in authors]] rows += [["Citations"] + [citationsPublications[author][0] for author in authors]] rows += [["Publications"] + [citationsPublications[author][1] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output NeighborSim & PathSim similarity scores neighborSimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, "APCPA PathSim") neighborSimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, "APPCPPA PathSim") # Omit extra duplicate entry in path, and weight at different levels of 'relative' for strategy, generalStrategyTitle in [ (FlattenedMatrixStrategy, "FlatMat"), (VectorProductStrategy, "VectorProduct"), ]: for w in [1.0, 0.5, 0]: neighborPathShapeStrategy = strategy( self.graph, weight=w, omit=[], metaPath=[Conference, Paper, Paper, Author], symmetric=True ) strategyTitle = "APPCPPA %s ShapeSim (%1.2f weight)" % (generalStrategyTitle, w) self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle) w = 1.0 neighborPathShapeStrategy = VectorProductStrategy( self.graph, weight=w, omit=[0], metaPath=[Conference, Paper, Paper, Author], symmetric=True ) strategyTitle = "APPCPPA VectorProduct ShapeSim omitting CPC (%1.2f weight)" % w self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle) # Output recursive pathsim strategy score(s) recursivePathSimStrategy = RecursivePathSimStrategy(self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, recursivePathSimStrategy, "APPCPPA Recursive PathSim")
def run(self): self.graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree( ) # Get the nodes we care about conferences = [ conferenceMap['SIGMOD'], conferenceMap['VLDB'], conferenceMap['ICDE'], conferenceMap['KDD'] ] authors = [ authorMap['Mike'], authorMap['Jim'], authorMap['Mary'], authorMap['Bob'], authorMap['Ann'], ] metaPathUtility = EdgeBasedMetaPathUtility() self.output('\nAPC Adjacency Matrix:') apcadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph( self.graph, [Author, Paper, Conference], project=True) adjMatrixTable = texttable.Texttable() rows = [['Author'] + [conference.name for conference in conferences]] rows += [[author.name] + [ apcadjMatrix[nodesIndex[author]][nodesIndex[conference]] for conference in conferences ] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) self.output('\nCPA Adjacency Matrix:') cpaadjMatrix, dsad = metaPathUtility.getAdjacencyMatrixFromGraph( self.graph, [Conference, Paper, Author], project=True) adjMatrixTable = texttable.Texttable() rows = [['Conference'] + [author.name for author in authors]] rows += [[conference.name] + [ cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors ] for conference in conferences] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) self.output('\nAPCPA Adjacency Matrix (Computed):') adjMatrix = numpy.dot(apcadjMatrix, cpaadjMatrix) adjMatrixTable = texttable.Texttable() rows = [['Author'] + [author.name for author in authors]] rows += [[author.name] + [ adjMatrix[nodesIndex[author]][nodesIndex[otherAuthor]] for otherAuthor in authors ] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output homogeneous simrank comparison homogeneousSimRankStrategy = SimRankStrategy(self.graph) self.outputSimilarityScores(authorMap, authors, homogeneousSimRankStrategy, 'Homogeneous SimRank') projectedGraph = metaPathUtility.createHeterogeneousProjection( self.graph, [Author, Paper, Conference], symmetric=True) # Output heterogeneous simrank comparison heterogeneousSimRankStrategy = SimRankStrategy(projectedGraph) self.outputSimilarityScores(authorMap, authors, heterogeneousSimRankStrategy, 'APC Heterogeneous SimRank') # Output heterogeneous simrank w/ squared neighbors comparison def sqNeighborsNorm(graph, a, b, sim): aNeighbors, bNeighbors = graph.getPredecessors( a), graph.getPredecessors(b) return float(len(aNeighbors)**2 * len(bNeighbors)**2) heterogeneousSquaredSimRankStrategy = SimRankStrategy( projectedGraph, normalization=sqNeighborsNorm) self.outputSimilarityScores(authorMap, authors, heterogeneousSquaredSimRankStrategy, 'Squared Heterogeneous SimRank') # Output NeighborSim similarity scores neighborSimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Conference], symmetric=True) self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, 'APC NeighborSim') # Output the PathSim similarity scores pathsimStrategy = PathSimStrategy( self.graph, [Author, Paper, Conference, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, pathsimStrategy, 'APCPA PathSim')