Python PageRank примеры, PageRank Python примеры использования

Пример #1

0

Показать файл

Файл: test.py Проект: riehseun/CoolSearchEngine

 def testmethod_4(self):
     dic = PageRank.relative_font_size("youtube")
     result = PageRank.search("youtube")
     if result == [-1]:
         self.assertNotEqual(len(dic), 1, "fail")
     else:
         self.assertNotEqual(len(dic), 0, "fail")

Пример #2

0

Показать файл

Файл: test.py Проект: riehseun/CoolSearchEngine

 def testmethod_1(self):
     dic = PageRank.hits("zhu")
     result = PageRank.search("zhu")
     if result == [-1]:
         self.assertNotEqual(len(dic), 1, "fail")
     else:
         self.assertNotEqual(len(dic), 0, "fail")

Пример #3

0

Показать файл

Файл: Tests.py Проект: freshforlife/datalab

def test_PR():

    IGgraph1=PageRank.getIGraph("MATCH (n:`andra-user`)<-[r:`andra-from`]-(t1:`andra-tweet`)-[re:`andra-retweet`]->(t2:`andra-tweet`)-[rel:`andra-from`]->(p:`andra-user`) WHERE t2.language="'"fr"'" RETURN n as nodeFrom,p as nodeTo",\
            grapheNeo,"192.168.1.75:7474","neo4j", "pass4dbse")
    IGgraph2=PageRank.getIGraph("MATCH (n:`andra-tweet`)-[re:`andra-retweet`]->(p:`andra-tweet`) RETURN n as nodeFrom,p as nodeTo",\
            grapheNeo,"192.168.1.75:7474","neo4j", "pass4dbse")

    assert type(IGgraph1) == igraph.Graph
    assert len(IGgraph1.vs) != 0
    assert len(IGgraph1.es) != 0

    assert type(IGgraph2) == igraph.Graph
    assert len(IGgraph2.vs) != 0
    assert len(IGgraph2.es) != 0

    PRank1 = PageRank.Rank(IGgraph1, 10)
    PRank2 = PageRank.Rank(IGgraph2, 10)

    assert type(PRank1) == dict
    assert len(PRank1['classement']) == 10
    assert type(PRank1['classement']) == list
    assert type(PRank1['resPR']) == list
    assert len(PRank1['resPR']) != 0
    assert type(PRank2) == dict
    assert len(PRank1['classement']) == 10
    assert type(PRank2['classement']) == list
    assert type(PRank2['resPR']) == list
    assert len(PRank2['resPR']) != 0
    print "PAGERANK OK"

Пример #4

0

Показать файл

    def pesquisar(self):
        self.tela.wm_title('Googlis!')
        self.tela.wm_minsize(width=500, height=250)
        tkinter.Label(self.tela, text=self.texto,
                      font=self.fonte).pack(side='top')

        #####  IMAGEM
        img = tkinter.PhotoImage(file='googlis.png')
        logo = tkinter.Button(self.tela, image=img)
        logo.image = img
        logo.place(x=190, y=30)

        #####  BOTÕES
        busca = tkinter.Entry(self.tela)
        busca.place(x=150, y=150, width=200)

        botao_1 = tkinter.Button(
            self.tela,
            text='Pesquisa Googlis!',
            command=lambda: PageRank.PageRank(busca.get()).rank_it())

        botao_2 = tkinter.Button(self.tela, text='Estou com sorte!')

        botao_1.place(x=130, y=200)
        botao_2.place(x=270, y=200)

Пример #5

0

Показать файл

Файл: CallGraphRank.py Проект: D-W-/CallGraphRanker

def main():
    data = read()
    M = data2matrix(data)
    v = PageRank.pagerank(M, 0.001, 0.85)
    funcDict = {}
    for i in xrange(counter):
        funcDict[id2func[i]] = v[i][0]
    funcDict = OrderedDict(sorted(funcDict.items(), key=lambda t: -t[1]))
    with open(os.path.join(resultFolder, resultFile), 'w') as pageRankFile:
        json.dump(funcDict, pageRankFile, indent=4)

Пример #6

0

Показать файл

Файл: SearchEngine.py Проект: errnesto/searchEngine

class SearchEngine:

  def __init__(self):
    indexer      = Indexer()
    self.graph   = Graph()
    self.crawler = Crawler({"http://mysql12.f4.htw-berlin.de/crawl/d01.html",
                            "http://mysql12.f4.htw-berlin.de/crawl/d06.html",
                            "http://mysql12.f4.htw-berlin.de/crawl/d08.html"},
                            self.graph, indexer)
    self.crawler.crawl()
    self.scorer = Scorer(indexer.index, indexer.documents)

    self.pageRank = PageRank(self.graph)
    self.pageRank.calc()


  def search (self, string, scoreOnly = False):
    query  = string.split()
    scores = self.scorer.scoreQuery(query)

    if scoreOnly:
      results = scores
    else:
      results = {}
      for url, score in scores.items():
        results[url] = score * self.graph.get_document(url).rank

    sortedResults = sorted(results.items(), key=operator.itemgetter(1), reverse = True)
    for res in sortedResults:
      print(res)

  def printPageRanks(self):
    print('Page ranks:')
    print('  d01  -   d02  -   d03  -   d04  -   d05  -   d06  -   d07  -   d08')
    print(round(self.graph.get_document("http://mysql12.f4.htw-berlin.de/crawl/d01.html").rank, 4), end = ' - ')
    print(round(self.graph.get_document("http://mysql12.f4.htw-berlin.de/crawl/d02.html").rank, 4), end = ' - ')
    print(round(self.graph.get_document("http://mysql12.f4.htw-berlin.de/crawl/d03.html").rank, 4), end = ' - ')
    print(round(self.graph.get_document("http://mysql12.f4.htw-berlin.de/crawl/d04.html").rank, 4), end = ' - ')
    print(round(self.graph.get_document("http://mysql12.f4.htw-berlin.de/crawl/d05.html").rank, 4), end = ' - ')
    print(round(self.graph.get_document("http://mysql12.f4.htw-berlin.de/crawl/d06.html").rank, 4), end = ' - ')
    print(round(self.graph.get_document("http://mysql12.f4.htw-berlin.de/crawl/d07.html").rank, 4), end = ' - ')
    print(round(self.graph.get_document("http://mysql12.f4.htw-berlin.de/crawl/d08.html").rank, 4), end = '\n\n')

Пример #7

0

Показать файл

Файл: WebServer.py Проект: ojasvajain/HadoopSearchEngine

def results():

    if request.method == 'POST':
        MAX_RESULTS = 20
        searchQuery = request.form['SearchQuery']  #get text entered
        results = PageRank.getRelevantResults(searchQuery, Table_InvertedIndex,
                                              Table_Webpages, MAX_RESULTS)

        return render_template(
            "results.html",
            results=results)  #pass list of results for rendering

Пример #8

0

Показать файл

Файл: train_model.py Проект: fuguigui/PageRank

    def run(self, alpha, save_dir=""):
        pg = PageRank(self.itr, self.error, alpha)

        author_resvec = pg.run(self.author_mat, self.author_init,
                               self.author_len)
        self.author.ScoreToName("Author", author_resvec,
                                save_dir + "author_page_rank.txt")
        author_resmap = self.author.getResult()

        paper_resvec = pg.run(self.paper_mat, self.paper_init, self.paper_len)
        self.paper.ScoreToName("self.paper", paper_resvec,
                               save_dir + "paper_page_rank.txt")
        paper_resmap = self.paper.getResult()

        self.venue_resvec = pg.run(self.venue_mat, self.venue_init,
                                   self.venue_len)
        self.venue.ScoreToVenue(self.venue_resvec, save_dir + "venue_rank.txt")
        venue_resmap = self.venue.getResult()

        return author_resmap, paper_resmap, venue_resmap

Пример #9

0

Показать файл

Файл: UI.py Проект: mynusplus/PageRank-Project

 def run(self):
     self.output.config(state=NORMAL)
     self.output.delete(1.0, END)
     try:
         result, htmlPageNames = PageRank.PageRank(float(self.aEntry.get()), currentFile)
         for item in reversed(range(len(result))):
             self.output.insert('1.0', htmlPageNames[item] + ': ' + str(round(float(result[item]), 3)) + '\n')
         # self.output.insert('1.0', result)
         self.output.insert('1.0', 'Importance of pages:' + '\n')
     except:
         self.output.insert('1.0', 'Invalid input.')
     self.output.config(state=DISABLED)

Пример #10

0

Показать файл

Файл: frontend.py Проект: riehseun/CoolSearchEngine

def links(query):
    text=[]
    f=open('./results.txt','w')#wipes out results from previous query in order to prevent those results from adding to a new user query    
    f.write('<p style="font-family:FreeSans;font-size:120%;color:black">')
    f.close()#inserts a line of code that displays the text result in the format named FreeSans at 1.2 times the original size before closing the file.
    PageRank.search(query)#calls the query function in PageRank.py that computes and stores the results in "results.txt"
    f = open("./results.txt")
    f2=open('./numbers.txt')#contains the total number of results for the user's inputted query
    for line in f:#reads text file and copies contents onto a tuple (named 'text'), with each element of the tuple representing one line in the textfile.
	word = line.strip()
	text.append(word)
    f.close()
    for line in f2:#reads the number of results determined and saves it onto a vvariable named 'word2'.
	word2 = line.strip()
    f.close() 
    html = '<p style="font-family:FreeSans;font-size:120%;color:black">'+"Your query '"+query+"' returned the following "+word2+" result(s):<br/>"
    if(query.find(' ')==1):
	html=html+"NOTE: Since you typed MORE THAN ONE keyword, the following results may not be acurate.<br/>"
    html=html+"<br/>"
    html=html+"<br/>".join(text)    
    return html#displays results to the user

Пример #11

0

Показать файл

Файл: SearchEngine.py Проект: errnesto/searchEngine

  def __init__(self):
    indexer      = Indexer()
    self.graph   = Graph()
    self.crawler = Crawler({"http://mysql12.f4.htw-berlin.de/crawl/d01.html",
                            "http://mysql12.f4.htw-berlin.de/crawl/d06.html",
                            "http://mysql12.f4.htw-berlin.de/crawl/d08.html"},
                            self.graph, indexer)
    self.crawler.crawl()
    self.scorer = Scorer(indexer.index, indexer.documents)

    self.pageRank = PageRank(self.graph)
    self.pageRank.calc()

Пример #12

0

Показать файл

Файл: crawler.py Проект: hanwang92/SearchEngine

 def PageRank(self):
     #Specify parameters of Page Rank algorithm  
     iterations = 20
     initial_pr = 1.0
   
     if self.dataBaseConnection.cursor():
     
         #Fetch all links from persistent file, sort them using PageRank and store them into rankedList
         self.cursor.execute('SELECT * FROM Links;')
         myData = self.cursor.fetchall()
         rankedList = PageRank.page_rank(myData, iterations, initial_pr)
         
         for x in rankedList:
             self.cursor.execute( """INSERT OR REPLACE INTO PageRank (doc_id, rank)  VALUES('%s', '%s');""" %  ( x,  rankedList[x]) ) # Use INSERT OR REPLACE to prevent duplicate
             self.dataBaseConnection.commit()

Пример #13

0

Показать файл

Файл: Experiments.py Проект: jdecid/CAIM-FIB

def experiment_lambda_rounds(airports_hash, airports_sink, results, init_function, init_type):
    for i in range(5, 10):
        lambda_value = i*0.1
        time1 = time.time()
        init_function(airports_hash)
        iterations = pr.compute_page_ranks(
            airports_hash=airports_hash, airports_sink=airports_sink, lambda_value=lambda_value, init_ranks=init_function)
        time2 = time.time()
        t = time2 - time1
        if init_type not in results:
            results[init_type] = {}
        lambda_str = '{:.1f}'.format(lambda_value)
        results[init_type][lambda_str] = {}
        results[init_type][lambda_str]['iterations'] = iterations
        results[init_type][lambda_str]['time'] = t
        results[init_type][lambda_str]['ranks'] = extract_results_airports(airports_hash)

Пример #14

0

Показать файл

    def __init__(self, layers=None, interLayers=None, weights=None):
        """
        Construct a Mulet with a list of individual layers.
        weights represent the inter layer edges between different layers.
        :return: None
        """
        if layers is not None:
            self.layers = layers
        if weights is not None:
            self.weights = weights
        if interLayers is not None:
            self.interLayers = interLayers
        self.getGenericGraphfromLayers()
        self.detCumulativeIntraLayerAcceptance()
        self.updateCumulativeAcceptance()

        self.detCumulativeInterLayerAcceptance()
        self.detCumulativeIntraLayerRejectance()
        self.updateCumulativeRejectance()

        self.detCumulativeInterLayerRejectance()
        finIp = InfluencePassivity(filename=None)

        finIp.InfluencePassivityAlgorithm(mygraph=self.g,
                                          Avals=self.A_delta,
                                          Rvals=self.R_delta)
        with open(
                '/Users/rashmijrao/Documents/IP-master/A1/Scripts2/NS_Final/Influences.json',
                'w') as outfile:
            json.dump(finIp.I, outfile)
        with open(
                '/Users/rashmijrao/Documents/IP-master/A1/Scripts2/NS_Final/Passivities.json',
                'w') as outfile:
            json.dump(finIp.P, outfile)

        print('Sum I:::', str(max((finIp.I.values()))))
        print('Sum P:::', str(max((finIp.P.values()))))

        pr = PageRank(directional=True)
        pr.modifyGraph(self.g)
        pr.pageRankAlgorithm(m=10)

        with open(
                '/Users/rashmijrao/Documents/IP-master/A1/Scripts2/NS_Final/Authority.json',
                'w') as outfile:
            json.dump(pr.a, outfile)
        with open(
                '/Users/rashmijrao/Documents/IP-master/A1/Scripts2/NS_Final/Hub.json',
                'w') as outfile:
            json.dump(pr.h, outfile)

        print('pagerank a:::', max(pr.a.values()))
        print('pagerank h:::', max(pr.h.values()))

Пример #15

0

Показать файл

Файл: test.py Проект: riehseun/CoolSearchEngine

 def testmethod_5(self):
     result = PageRank.trim_url("http://en.wikipedia.org/wiki/Hotmail") 
     self.assertEqual(result, "wikipedia.org")

Пример #16

0

Показать файл

     (authority, hubness) = HITS.HITS(graph)
     tEnd = time.time()
     timeCost = tEnd - tStart
     with open("output.txt", 'w', encoding='UTF-8') as f:
         f.write("time cost: %f\n" % timeCost)
         f.write("HITS\n")
         f.write("   authority, hubness\n")
         for key in graph:
             f.write("%s: " % key)
             f.write("%f, " % authority[key])
             f.write("%f\n" % hubness[key])
     f.close()
 elif method == 'pagerank':
     damp = 0.15
     tStart = time.time()
     pageRank = PageRank.PageRank(graph, damp)
     tEnd = time.time()
     timeCost = tEnd - tStart
     with open("output.txt", 'w', encoding='UTF-8') as f:
         f.write("time cost: %f\n" % timeCost)
         f.write("PageRank\n")
         for key in pageRank:
             f.write("%s: " % key)
             f.write("%f\n" % pageRank[key])
     f.close()
 elif method == 'simrank':
     c = 0.8  #decay factor
     tStart = time.time()
     simMatrix = SimRank.SimRank(graph, c)
     tEnd = time.time()
     timeCost = tEnd - tStart

Пример #17

0

Показать файл

import networkx as nx
import sys
sys.path.append('..')
import PageRank

G = nx.DiGraph()

G.add_nodes_from([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])

G.add_edge(1, 2)
G.add_edge(1, 4)
G.add_edge(1, 3)
G.add_edge(2, 3)
G.add_edge(4, 3)
G.add_edge(1, 5)
G.add_edge(2, 1)
G.add_edge(5, 2)
G.add_edge(3, 4)
G.add_edge(2, 4)

PR = PageRank.PageRank(G)
PR.constructDispersionMatrix(G)
print(PR.getPageRank())
print(sum(PR.getPageRank()))

Пример #18

0

Показать файл

Файл: main.py Проект: man93oj/A-Web-Search-Engine

def main():
    WebCrawler.main()
    Indexer.main()
    PageRank.main()

Пример #19

0

Показать файл

        return 1
    return count


for s in range(len(sentences)):
    if s > 0:
        adjMax[s][s - 1] += findRelation(sentences[s]) * 0
    for ss in range(len(sentences)):
        if s != ss:
            adjMax[s][ss] += findOverlap(sentences[s], sentences[ss])

# for m in adjMax:
#     print " ".join([str(i) for i in m])

G = np.array(adjMax)
rank = PageRank.pageRank(G, s=0.5)

l = []
ind = 0
for r in rank:
    l.append((r, ind))
    ind += 1

l.sort()
l.reverse()
print words_used
# print all inportant sentences
for k in l:
    print sentences[k[1]]
    print k[0]
    print "==============="

Пример #20

0

Показать файл

 def test_calc(self):
     self.assertTrue(P.get_weight())
     '''

Пример #21

0

Показать файл

Файл: exp.py Проект: fuguigui/PageRank

import numpy as np

data = open(
    'E:\\learning\\WebDataMining\\hw\\aan\\release\\2014\\venue_test.txt')

# pattern of the venue
p1 = r"(?<=id\s=\s{).+?(?=})"
pattern1 = re.compile(p1)
p2 = r"(?<=venue\s=\s{).+?(?=})"
pattern2 = re.compile(p2)

# venue
venue = MapToMatrix('E:\\learning\\WebDataMining\\hw\\aan\\release\\2014\\')
venue.FileToList_Reg('venue_test.txt', pattern1, pattern2)
venue_len = venue.getLen()
venue_mat = venue.SwitchMap('paper_venue_test.txt')
venue_init = np.ones(venue_len)

pg = PageRank(50, 0.0001, 0.6)
venue_resvec = pg.run(venue_mat, venue_init, venue_len)

venue.ScoreToVenue(venue_resvec, "exp_venue_rank.txt")
venue_resmap = venue.getResult()
venue_topn = topN(3, venue_resmap)

sum_error = compareDiff(venue_topn, venue_topn)
print sum_error
# #
# alpha=0.6
# topn=10
# print "when alpha=%f, the top %d is \n"%(alpha,topn)

Пример #22

0

Показать файл

Файл: EvalIRModel.py Проект: NabilDam/RITAL

    def __init__(self,
                 fichier,
                 findParametre=False,
                 metrique="FMesure",
                 tailleTrain=0.65,
                 verbose=False):
        """
           Permet d'initialiser EvalAllIRModel
           
        :type fichier: String 
        :param fichier: le fichier ou sont stocker les queries, 
            
        :type findParametre: boolean
        :param findParametre: boolean pour activer l'optimisation des parametres
        
        :type metrique: String
        :param metrique: La metrique que l'on veux utiliser  
                
        :type tailleTrain: float
        :param tailleTrain: la proportion de train sur l'ensemble des queries

        :type verbose: boolean
        :param verbose: boolean pour activer le mode verbeux
        """

        collection = Parser.Parser.buildDocCollectionSimple(fichier + '.txt',
                                                            pageRank=True)
        self.collectionQry = Parser.Parser.buildQueryCollection(fichier)
        self.train = dict()
        self.verbose = verbose

        self.print_verbose("Initialisation")

        if findParametre:
            self.separeTrainTest(tailleTrain)

        index = IndexerSimple.IndexerSimple()
        index.indexation(collection)

        self.weighter = [
            Weighter.Weighter1(index),
            Weighter.Weighter2(index),
            Weighter.Weighter3(index),
            Weighter.Weighter4(index),
            Weighter.Weighter5(index)
        ]

        modelIR = [IRModel.Vectoriel, IRModel.Jelinek_Mercer, IRModel.Okapi]

        model = []

        for w in self.weighter:
            for m in range(len(modelIR)):

                if m == 1:  # pour le modèle Jelinek_Mercer
                    jelinek = modelIR[m](w)
                    if findParametre:
                        jelinek.findParametreOptimaux(np.arange(0, 1.4, 0.1),
                                                      self.train, metrique)
                    model.append(jelinek)
                elif m == 2:  # pour le modèle Okapi
                    okapi = modelIR[m](w)
                    if findParametre:
                        okapi.findParametreOptimaux(np.arange(0, 0.5, 0.1),
                                                    np.arange(1.5, 2, 0.1),
                                                    self.train, metrique)
                    model.append(okapi)
                else:  # pour le modèle Vectoriel
                    model.append(
                        modelIR[m](w))  # il n'y a pas de parametre a optimiser
                    model.append(modelIR[m](w, True))

        if findParametre:
            self.print_verbose(
                "Tout les models Jelinek_Mercer et Okapi sont entrainer")
        self.model = []

        for m in model:
            self.model.append(EvalIRModel(self.collectionQry, m))

            pr = PageRank.PageRank(m.getWeighter(), m)
            if findParametre:
                pr.findParametreOptimaux(np.arange(0.85, 0.95, 0.05),
                                         self.train, metrique)

            self.model.append(EvalIRModel(self.collectionQry, pr))
        if findParametre:
            self.print_verbose("Tout les models PageRank sont entrainer")

        self.print_verbose("Initialisation terminer")

Пример #23

0

Показать файл

Файл: leaveOneOut.py Проект: padillac/Disease-Gene-Network-Analysis

def leave_one_out(function, diseaseGeneFilePath, PPI_Network, param):
    print("Starting leaveOneOut function")

    # building list of disease genes
    diseaseGeneFile = open(diseaseGeneFilePath, 'r')
    allDiseaseGenes = diseaseGeneFile.read().splitlines()
    diseaseGeneFile.close()
    # print(allDiseaseGenes)
    numDiseaseGenes = len(allDiseaseGenes)
    rankThreshhold = 150

    numGenesNotFound = 0

    degree_list = []  #remove after graph is made (kate)
    in_out_list = []  #remove after graph is made (kate)

    graph_nodes = list(PPI_Network.nodes())
    startVector = load_start_vector(diseaseGeneFilePath, PPI_Network)
    startVector = (numDiseaseGenes / (numDiseaseGenes - 1)) * startVector
    # skipping
    for index, skipGene in enumerate(allDiseaseGenes):

        # find the skip gene in the start vector, make it zero
        index = graph_nodes.index(skipGene)
        node_degree = PPI_Network.degree(
            skipGene)  #remove after graph is made (kate)
        degree_list.append(node_degree)  #remove after graph is made (kate)
        newStartVector = startVector.copy()
        newStartVector[index] = 0
        #   startVector[index] = 0
        priors_vector = np.zeros(PPI_Network.number_of_nodes())
        if function == pr.page_rank:
            priors_file_path = find_priors_file(diseaseGeneFilePath)
            priors_vector = pr.load_priors(priors_file_path, PPI_Network)
            priors_vector[index] = 0

        #run algorithm using modified disease gene file
        startTime = time.time()
        output = []
        if function == pr.page_rank:
            output = function(PPI_Network, newStartVector, priors_vector,
                              param)
        else:
            print("sum of start vector:", np.sum(startVector))
            output = function(PPI_Network, newStartVector, param)
        endTime = time.time()
        print("finished algorithm. Time elapsed:", endTime - startTime)

        #find the predicted probability of the omitted gene and add it to the current sum
        startTime = time.time()
        foundGene = False
        for i in range(rankThreshhold):
            if output[i][0] == skipGene:
                foundGene = True
                print("Found the gene: ", skipGene, "at rank: ", i)
                in_out_list.append(1)  #remove after graph is made (kate)
                break
        if not foundGene:
            numGenesNotFound += 1
            in_out_list.append(-1)  #remove after graph is made (kate)

        endTime = time.time()

    # write the results of leave one out to a file
    disease_name = diseaseGeneFilePath.split(".")[0]
    output_name = "leave_one_out_1" + disease_name[5:]
    if function == pr.page_rank:
        output_name = output_name + "_pr.tsv"
    elif function == dk.diffusion_kernel:
        output_name = output_name + "_dk.tsv"
    elif function == rwr.random_walk:
        output_name = output_name + "_rwr.tsv"
    with open(output_name, "w") as output:
        for i in range(len(allDiseaseGenes)):
            output_string = allDiseaseGenes[i] + "\t" + str(
                degree_list[i]) + "\t" + str(in_out_list[i]) + "\n"
            output.write(output_string)

    print(
        "------------------------\nFinished running algorithm with all disease genes left out\nCalculating mean squared difference"
    )
    print("Num genes not found for this run of leave one out: ",
          numGenesNotFound)
    #Find average of all squared differences
    percentCorrectlyRankedGenes = 1 - numGenesNotFound / numDiseaseGenes
    return percentCorrectlyRankedGenes

Пример #24

0

Показать файл

Файл: graphAnalyzer.py Проект: JulesBelveze/wikipedia-pages-suggestion

def graphAnalyzer(graph, kmeans=False):
    """Argument: the path to find a .gml graph file, boolean : if yes using scikit-learn KMean to cluster otherwise
    using our dbscan algorithm.
    Will page rank and cluster the nodes in order to return the highest page rank page in the three biggest clusters
    It also print a graph in order to visualize the clustering"""
    G = nx.read_gml(graph)
    G = removeIsolatedNodes(G)  # removing meaningless nodes
    G.remove_node(list(G.nodes)[0])

    # ----------------------------------- PageRank Computation --------------------------------------

    # creating a PageRank object
    pr = PageRank.PageRank(G)
    pr.constructDispersionMatrix(G)
    pr = pr.getPageRank()

    # ----------------------------------- Clustering Computation --------------------------------------

    # constructing network layout
    forceatlas2 = fa2.ForceAtlas2(
        # Behavior alternatives
        outboundAttractionDistribution=False,  # Dissuade hubs
        linLogMode=False,  # NOT IMPLEMENTED
        adjustSizes=False,  # Prevent overlap (NOT IMPLEMENTED)
        edgeWeightInfluence=0,

        # Performance
        jitterTolerance=.01,  # Tolerance
        barnesHutOptimize=True,
        barnesHutTheta=1.2,
        multiThreaded=False,  # NOT IMPLEMENTED

        # Tuning
        scalingRatio=1,
        strongGravityMode=True,
        gravity=200,
        # Log
        verbose=True)

    pos = forceatlas2.forceatlas2_networkx_layout(G, pos=None, iterations=1000)

    if kmeans:
        # converting positions into a list of np.array
        pos_list = [np.array([elt[0], elt[1]]) for key, elt in pos.items()]

        # clustering the nodes according to the kmeans algorithm
        clusters = Kmeans.kmeans(pos_list, 8, 0.01, 300)

    else:
        pos = {key: np.array([elt[0], elt[1]]) for key, elt in pos.items()}
        pos_transf = dbscan.transf(
            pos)  # changing position format to be able to use it in DBSCAN
        clusters = dbscan.dbscan(pos_transf, 40, 20)  # clustering

    cluster_with_pr = associatingPageRankToNode(pr, clusters)

    # sorting each cluster according to page rank result
    for key, value in cluster_with_pr.items():
        cluster_with_pr[key] = sorted(value,
                                      key=lambda item: (item[1], item[0]))

    # rendering the suggested pages and their page rank
    print("\nThe recommanded pages are the following :")
    for key, value in cluster_with_pr.items():
        try:
            node_index = value[-1][0]  # retrieving the node index
            title_node = re.search(
                r'titles=(.*?)\&',
                list(G.nodes())
                [node_index])  # getting the title of the Wikipedia page
            print("•", title_node.group(1), "- with a page rank of ",
                  value[-1][1])
        except IndexError:
            pass

    # ----------------------------------- Graph Creation --------------------------------------

    # each node within a cluster have the same color
    get_colors = lambda n: list(
        map(lambda i: "#" + "%06x" % random.randint(0, 0xFFFFFF), range(n)))

    colors = get_colors(len(clusters.keys()) + 1)
    node_color = ['black' for _ in range(len(G.nodes()))]
    for key, value in clusters.items():
        for elt in value:
            node_color[elt] = colors[key]

    nx.draw(G.to_undirected(),
            pos,
            node_size=2,
            width=.05,
            edge_color='grey',
            node_color=node_color)
    plt.savefig("graph_with_layout.png")

Пример #25

0

Показать файл

Файл: TopicSensitivePageRank.py Проект: sanjekus/Miscellaneous-implementations

if __name__ == '__main__':

    if len(sys.argv) != 2:
        print "usage: ./TopicSensitivePageRank.py <file to read json data(tweets)>"
        sys.exit(1)

    filename = sys.argv[1]
    f = file(filename, "r")
    tweets = f.readlines()

    #final_list = findMostPopularWords(tweets)

    dict_buckets = staticDictOfBuckets()
    tagged_dict = taggedUsers_dict(tweets, dict_buckets)
    #print tagged_dict
    pageRank = PageRank()

    global_dict = pageRank.create_dictionary(tweets)
    idToUserMap = pageRank.map_IDtoUsername(tweets)
    updated_dict = update_dictionary(global_dict, tagged_dict)

    for tag in updated_dict:
        print tag, len(updated_dict[tag])

    #print updated_dict_social
    final_dict_social = pageRank.update_dictionary(updated_dict['technology'])
    print final_dict_social

    final_list_social = sorted(final_dict_social.items(),
                               key=lambda x: x[1],
                               reverse=True)

Пример #26

0

Показать файл

Файл: main.py Проект: xiaoxulv/PageRank

__author__ = 'Ariel'

import numpy as np
import time
import readHelper
import writeHelper
import PageRank


start_time = time.time()

# get teleportation matrix
m = readHelper.getSparseMatrix('transition.txt',True)

# global PageRank
globalPR, outGPR = PageRank.pagerank(m, 0.1)

# out-line link injection for topic sensitive PageRank
topic = readHelper.getSparseMatrix('doc-topics.txt', False).transpose()
tspr, outTSPR = PageRank.topicSensitivePageRank(m, topic, 0.25, 0.65)

# query topic sensitive PageRank
queryTopic, queryDistr = readHelper.getDistro('query-topic-distro.txt')
outQTSPR = PageRank.OnlineTopicSensitivePR(outTSPR, queryDistr[queryTopic[(2,2)]])
queryTopicPR = PageRank.OnlineTopicSensitivePR(tspr, queryDistr)

# user topic sensetive PageRank
userTopic, userDistr = readHelper.getDistro('user-topic-distro.txt')
outPTSPR = PageRank.OnlineTopicSensitivePR(outTSPR, userDistr[userTopic[(2,2)]])
userTopicPR = PageRank.OnlineTopicSensitivePR(tspr, userDistr)

Пример #27

0

Показать файл

Файл: Main.py Проект: vasiliy249/PageRank

if test_matrix:
    P = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0],
                  [0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0],
                  [0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0],
                  [0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0],
                  [0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]])
    node_count = P.shape[0]
    x = np.ones(node_count)
    weights, iter_count = PageRank.power_iter_matrix(x, graph=P, epsilon=1e-20, d=0.85, max_iter=1000)

    print("Weights:")
    print(weights)
    print("Iteration count:")
    print(iter_count)
else:

    P = None
    # P = {(1,2), (2,1), (3,0), (3,1), (4,3), (4,1), (4,5), (5,1), (5,4), (6,1), (6,4), (7,1), (7,4), (8,1), (8,4), (9,4), (10,4)}
    # node_count = 11
    with open(file_name, 'r') as f:
        line_count = sum(1 for line in f)
        P = np.zeros(line_count, dtype='int32, int32')
        f.seek(0)
        for i, line in enumerate(f):

Пример #28

0

Показать файл

def summarise(filepath,
              co_ref=1,
              page_rank=True,
              debug_output=True,
              num_words=200,
              overlap=True):
    if not os.path.isdir("stanford-corenlp"):
        print >> sys.stderr, "Please put the Stanford CoreNLP package into the stanford-corenlp directory."
        quit()

    filename = filepath.split("/")[-1]
    if not os.path.isfile("stanford-corenlp/" + filename + ".xml"):
        shutil.copyfile(filepath, "stanford-corenlp/" + filename)
        if os.name == "nt":
            os.system(
                "cd stanford-corenlp && java -cp stanford-corenlp-3.2.0.jar;stanford-corenlp-3.2.0-models.jar;xom.jar;joda-time.jar;jollyday.jar -Xmx3g edu.stanford.nlp.pipeline.StanfordCoreNLP -file "
                + filename)
        else:
            os.system(
                "cd stanford-corenlp && java -cp stanford-corenlp-3.2.0.jar:stanford-corenlp-3.2.0-models.jar:xom.jar:joda-time.jar:jollyday.jar -Xmx3g edu.stanford.nlp.pipeline.StanfordCoreNLP -file "
                + filename)

    sentences, coref = splitAndParse.splitSentencesAndParse(
        "stanford-corenlp/" + filename + ".xml")

    if debug_output:
        print "coref", coref

    adjMax = [[0 for s in sentences] for s in sentences]

    if co_ref:
        for co in coref:
            dic = {}
            for s in co:
                if s in dic:
                    dic[s] += 1
                else:
                    dic[s] = 1
            for s in dic:
                for ss in dic:
                    if s != ss:
                        if co_ref == 1:
                            adjMax[s][ss] += (dic[s] + 0.0) / dic[ss]
                        elif co_ref == 2:
                            adjMax[s][ss] += (dic[ss] + 0.0) / dic[s]
                        elif co_ref == 3:
                            adjMax[s][ss] += dic[ss] + dic[s]
                        elif co_ref == 4:
                            adjMax[s][ss] += dic[ss] * dic[s]
                        elif co_ref == 5:
                            adjMax[s][ss] += (dic[ss] + dic[s]) * 2
                        elif co_ref == 6:
                            adjMax[s][ss] += (dic[ss] + dic[s]) * 5

    words_used = set()

    if debug_output:
        print "before overlap"
        for m in adjMax:
            print " ".join([str(i) for i in m])

    if overlap:
        for s in range(len(sentences)):
            for ss in range(len(sentences)):
                if s != ss:
                    adjMax[s][ss] += findOverlap(sentences[s], sentences[ss],
                                                 words_used)

    if debug_output:
        print "after overlap"
        for m in adjMax:
            print " ".join([str(i) for i in m])
    l = []
    scores = []

    if page_rank:
        G = np.array(adjMax)
        scores = PageRank.zeroToOne(G, s=0.5)
    else:
        scores = [sum(row) for row in adjMax]

    ind = 0
    for s in scores:
        l.append((s, ind))
        ind += 1

    l.sort()
    l.reverse()
    if debug_output:
        print words_used
        # print all inportant sentences
        for k in l:
            print sentences[k[1]]
            print k[0]
            print "==============="

    best_first = cutoff_words(l, sentences, 200, adjMax)
    by_order = []
    for i in range(0, len(best_first)):
        by_order.append((l[i][1], best_first[i]))
    by_order.sort()
    return [s[1] for s in by_order]

Пример #29

0

Показать файл

from PageRank import *
import time

start_time = time.time()
##data_file = "Datasets/sx-mathoverflow.txt"
data_file = "Datasets/Wiki-Vote.txt"
##data_file = "Datasets/test.txt"
is_page_no_zero_indexed = False
epsilon = 0.00001
max_iterations = 10
beta = 0.85
display_network_after_each_iteration = True
max_no_of_nodes_to_show = 20

PgRank = PageRank(data_file, is_page_no_zero_indexed, max_iterations, beta, epsilon, display_network_after_each_iteration)
print("Rank Vector:")
for i in PgRank.rank_vector[:max_no_of_nodes_to_show]:
    print(i)
    
PgRank.display_network(PgRank.rank_vector, max_no_of_nodes_to_show)    
    
if(is_page_no_zero_indexed):
    teleport_set = [i for i in PgRank.matrix if i%500==0]
else:
    teleport_set = [i+1 for i in PgRank.matrix if i%500==0]
topic_specific_rank_vector = PgRank.topic_specific_page_rank(teleport_set)
print("Topic Specific Rank Vector:")
for i in topic_specific_rank_vector[:max_no_of_nodes_to_show]:
    print(i)

end_time = time.time()

Пример #30

0

Показать файл

Файл: test.py Проект: riehseun/CoolSearchEngine

 def testmethod_1(self):
     result = PageRank.search("youtube")
     self.assertNotEqual(len(result),0,"fail")

Пример #31

0

Показать файл

Файл: main.py Проект: huguesderogis/densestSubgraph

listS5, best_avg5 = clique.find_densest_subgraph(G)
end5 = time.clock()
time5 = end5 - beg5
L4 = []
for x in range(0, len(listS5)):
    for y in listS5[x]:
        L4.append(y)
L4 = list(set(L4))
#print L4
recall_Clique = len(list(golden_keywords.intersection(L4))) / float(
    len(golden_keywords))
precision_Clique = len(list(golden_keywords.intersection(L4))) / float(len(L4))

# PageRank
beg7 = time.clock()
list7 = pr.find_densest_subgraph(G, 10)
end7 = time.clock()
time7 = end7 - beg7
temp = [list7[i][0] for i in range(len(list7))]
recall_PageRank = len(list(golden_keywords.intersection(temp))) / float(
    len(golden_keywords))
precision_PageRank = len(list(golden_keywords.intersection(temp))) / float(
    len(temp))

#Using the function from Networkx K-Cores
beg4 = time.clock()
G.remove_edges_from(G.selfloop_edges())
S4 = nx.k_core(G)
end4 = time.clock()
time4 = end4 - beg4
temp = S4.nodes()

Пример #32

0

Показать файл

Файл: Tests.py Проект: freshforlife/datalab

def test_influence():

    resIGFG = DetectionCommunautes.DetectionComIG(gTwitter, "FastGreedy")
    resIGIM = DetectionCommunautes.DetectionComIG(gTwitter, "InfoMap")
    resSNAPBC = DetectionCommunautes.DetectionComSNAP(gTwitter, "BigClam",
                                                      path + 'snap/', path,
                                                      'outputGraph.txt',
                                                      'outputAlgo.txt')
    resSNAPIM = DetectionCommunautes.DetectionComSNAP(gTwitter, "InfoMap",
                                                      path + 'snap/', path,
                                                      'outputGraph.txt',
                                                      'outputAlgo.txt')
    resSNAPCPM = DetectionCommunautes.DetectionComSNAP(gTwitter, "CPM",
                                                       path + 'snap/', path,
                                                       'outputGraph.txt',
                                                       'outputAlgo.txt')

    NomComFG = InfluenceCommunautes.NomsCommunautes(resIGFG['membership'],
                                                    gTwitter)
    NomComIM = InfluenceCommunautes.NomsCommunautes(resIGIM['membership'],
                                                    gTwitter)

    assert type(NomComFG['comNoms']) == dict
    assert type(NomComFG['comNode']) == dict
    assert len(NomComFG['comNoms']) != 0
    assert len(NomComFG['comNode']) != 0
    assert type(NomComIM['comNoms']) == dict
    assert type(NomComIM['comNode']) == dict
    assert len(NomComIM['comNoms']) != 0
    assert len(NomComIM['comNode']) != 0

    IGgraph1=PageRank.getIGraph("MATCH (n:`andra-user`)<-[r:`andra-from`]-(t1:`andra-tweet`)-[re:`andra-retweet`]->(t2:`andra-tweet`)-[rel:`andra-from`]->(p:`andra-user`) WHERE t2.language="'"fr"'" RETURN n as nodeFrom,p as nodeTo",\
            grapheNeo,"192.168.1.75:7474","neo4j", "pass4dbse")
    PRank1 = PageRank.Rank(IGgraph1, 10)

    InfluFG = InfluenceCommunautes.InfluenceCommunautes(
        grapheNeo, gTwitter, PRank1['resPR'], NomComFG['comNode'], 0.2, 10)
    InfluIM = InfluenceCommunautes.InfluenceCommunautes(
        grapheNeo, gTwitter, PRank1['resPR'], NomComIM['comNode'], 0.2, 10)
    InfluBC = InfluenceCommunautes.InfluenceCommunautes(
        grapheNeo, gTwitter, PRank1['resPR'], resSNAPBC['comCodes'], 0.2, 10)
    InfluIMS = InfluenceCommunautes.InfluenceCommunautes(
        grapheNeo, gTwitter, PRank1['resPR'], resSNAPIM['comCodes'], 0.2, 10)
    InfluCPM = InfluenceCommunautes.InfluenceCommunautes(
        grapheNeo, gTwitter, PRank1['resPR'], resSNAPCPM['comCodes'], 0.2, 10)

    assert type(InfluFG) == dict
    assert type(InfluIM) == dict
    assert type(InfluBC) == dict
    assert type(InfluIMS) == dict
    assert type(InfluCPM) == dict

    InfluHTFG = InfluenceCommunautes.InfluenceHashtags(grapheNeo,
                                                       NomComFG['comNode'],
                                                       0.2)
    InfluHTIM = InfluenceCommunautes.InfluenceHashtags(grapheNeo,
                                                       NomComIM['comNode'],
                                                       0.2)
    InfluHTBC = InfluenceCommunautes.InfluenceHashtags(grapheNeo,
                                                       resSNAPBC['comCodes'],
                                                       0.2)
    InfluHTIMS = InfluenceCommunautes.InfluenceHashtags(
        grapheNeo, resSNAPIM['comCodes'], 0.2)
    InfluHTCPM = InfluenceCommunautes.InfluenceHashtags(
        grapheNeo, resSNAPCPM['comCodes'], 0.2)

    assert type(InfluHTFG) == dict
    assert type(InfluHTIM) == dict
    assert type(InfluHTBC) == dict
    assert type(InfluHTIMS) == dict
    assert type(InfluHTCPM) == dict
    assert len(InfluHTFG) == len(NomComFG['comNode'])
    assert len(InfluHTIM) == len(NomComIM['comNode'])
    assert len(InfluHTBC) == len(resSNAPBC['comNoms'])
    assert len(InfluHTIMS) == len(resSNAPIM['comNoms'])
    assert len(InfluHTCPM) == len(resSNAPCPM['comNoms'])

    InflueTweeFG = InfluenceCommunautes.InfluenceTweets(NomComFG['comNode'])
    InflueTweeIM = InfluenceCommunautes.InfluenceTweets(NomComIM['comNode'])
    InflueTweeBC = InfluenceCommunautes.InfluenceTweets(resSNAPBC['comCodes'])
    InflueTweeIMS = InfluenceCommunautes.InfluenceTweets(resSNAPIM['comCodes'])
    InflueTweeCPM = InfluenceCommunautes.InfluenceTweets(
        resSNAPCPM['comCodes'])

    assert type(InflueTweeFG) == dict
    assert type(InflueTweeIM) == dict
    assert type(InflueTweeBC) == dict
    assert type(InflueTweeIMS) == dict
    assert type(InflueTweeCPM) == dict
    assert len(InflueTweeFG) == len(NomComFG['comNode'])
    assert len(InflueTweeIM) == len(NomComIM['comNode'])
    assert len(InflueTweeBC) == len(resSNAPBC['comNoms'])
    assert len(InflueTweeIMS) == len(resSNAPIM['comNoms'])
    assert len(InflueTweeCPM) == len(resSNAPCPM['comNoms'])

    print "INFLUENCE OK"

Пример #33

0

Показать файл

        meanDegree = (2 * numberOfEdges) / numberOfNodes
        return meanDegree / (numberOfNodes - 1)


#deleted 5,6,7,11
#8=5, 9=6, 10=7, 12=8, 13=9, 14=10, 15=11, 16=12, 17=13, 18=14
Friends = [[2], [1, 4, 9, 11], [4], [2, 3], [6], [5], [8], [7, 9, 10],
           [2, 8, 10], [8, 9], [2, 12], [11], [14], [13]]

#deleted 5,6,7,11
# 8=5,9=6,10=7,12=8,13=9,14=10,15=11,16=12,17=13,18=14
FirstNames = [[2, 4, 8], [1, 4, 9, 11], [4], [1, 2, 3, 13, 14], [6], [5],
              [8, 13], [1, 7, 9, 10], [2, 8, 10], [8, 9, 14], [2, 12], [11],
              [4, 7, 14], [4, 10, 13]]

#deleted 1,5,8,9,11,15,16
# 2=1, 3=2, 4=3, 6=4, 7=5, 10=6, 12=7, 13=8, 14=9, 17=10, 18=11
HaveClass = [[3, 8], [3], [1, 2, 10, 11], [5], [4], [7], [6, 8], [1, 7, 11],
             [11], [3, 11], [3, 8, 9, 10]]

#deleted 3,5,6,7,11,14,15,16
# 4=3,8=4,9=5,10=6,12=7,13=8,17=9,18=10
SocialEvents = [[2], [1], [9], [5], [4], [7, 9], [6, 8], [7], [3, 6, 10], [9]]

#print(HW1.out_degree(Friends))
#print(HW1.in_degree(Friends))
PR.PageRank(Friends)
PR.PageRank(FirstNames)
PR.PageRank(HaveClass)
PR.PageRank(SocialEvents)

Пример #34

0

Показать файл

Файл: test.py Проект: riehseun/CoolSearchEngine

 def testmethod_6(self):
     result = PageRank.search("ruosyguweryiotgryu")
     self.assertEqual(len(result), 1, "fail")

Пример #35

0

Показать файл

Файл: test.py Проект: riehseun/CoolSearchEngine

 def testmethod_3(self):
     result = PageRank.search("shit")
     self.assertNotEqual(len(result),0,"fail")

Пример #36

0

Показать файл

Файл: searchEngine.py Проект: aishwaryaSahani/SearchEngine

    print("Loading tf-idf")
    tfidf = calculateTFIDF(invertedIndex, maxCount, N)

    #     doIndexingTfIDF(tfidf)
    #     doIndexingPreprocessedWord()
    #     doIndexingDocuments()
    #     doIndexingPR()

    #     tfidf = fetchIndexingTfIDF(tfidf)
    #     cleanWords = fetchIndexingPreprocessedWord()
    #     docCounterList = fetchIndexingDocuments()
    #     pageranks = fetchIndexingPR()

    # calculate pagerank
    print("Loading pagerank")
    pageranks = pr.compute(linksDocs)

    # load any embeddings of choice
    print("Loading word embeddings")
    embeddings_dict = utils.load_glove()
    #     embeddings_dict = gensim.models.KeyedVectors.load_word2vec_format('Embeddings\GoogleNews-vectors-negative300.bin', binary=True)

    #list of predefined queries
    queries = [
        "Professors who teach NLP at UIC", "Student organizations at uic",
        "Student Orientation at UIC", "How has coronavirus affected UIC",
        "Centers for Cultural Understanding and Social Change"
    ]
    # press enter to validate pre-defined queries
    # otherwise input your query
    query = input(

Пример #37

0

Показать файл

Файл: test.py Проект: riehseun/CoolSearchEngine

 def testmethod_5(self):
     result = PageRank.search("") 
     self.assertEqual(len(result), 1, "fail")

Пример #38

0

Показать файл

def main():
    print("Starting AUROC..")
    #Get file path choices
    pathToPPINetworkFile = sys.argv[1]
    #pathToPPINetworkFile = 'Data/9606.protein.links.v11.0.txt'

    # Get output vectors from each algorithm

    PPI_Network = compute_if_not_cached(loader.load_graph,
                                        pathToPPINetworkFile,
                                        fileName=pathToPPINetworkFile)
    ground_truth_files = [
        'Data/MalaCard-protein-Endometriosis.diseasegenes.tsv',
        'Data/MalaCard-protein-ischaemic-stroke.diseasegenes.tsv',
        'Data/MalaCard-protein-lymphoma.diseasegenes.tsv'
    ]
    file_paths = [
        'Data/endometriosis-proteins.diseasegenes.tsv',
        'Data/lymphoma-proteins.diseasegenes.tsv',
        'Data/ischaemic-proteins.diseasegenes.tsv'
    ]
    prior_paths = [
        'Data/endometriosis-proteins.priors.tsv',
        'Data/lymphoma-proteins.priors.tsv',
        'Data/ischaemic-proteins.priors.tsv'
    ]
    names = ['endometriosis', 'lymphoma', 'ischaemic']

    for i in range(1, 3):
        # building ground truth
        ground_truth_vec = []
        with open(ground_truth_files[i], 'r') as input_file:
            input_file = input_file.readlines()
            for line in input_file:
                protein = line.rstrip('\n')
                ground_truth_vec.append(protein)
        gene_file = open(file_paths[i], 'r')
        file_contents = list(gene_file.readlines())
        # print(file_contents)
        for line in file_contents:
            protein = line.rstrip('\n')
            if protein not in ground_truth_vec:
                ground_truth_vec.append(protein)
        gene_file.close()
        print(ground_truth_vec)
        # building start and priors vector

        start_vector = loader.load_start_vector(file_paths[i], PPI_Network)
        priors_vector = pr.load_priors(prior_paths[i], PPI_Network)

        #getting output from algorithms
        start_time = time.time()
        output_RWR = rwr.random_walk(PPI_Network, start_vector)
        end_time = time.time()
        print("time for rwr:", end_time - start_time)
        start_time = time.time()
        output_PR = pr.page_rank(PPI_Network, start_vector, priors_vector)
        end_time = time.time()
        print("time for pr:", end_time - start_time)

        start_time = time.time()
        output_DK = dk.diffusion_kernel(PPI_Network, start_vector)
        end_time = time.time()
        print("time for dk:", end_time - start_time)

        #building roc curves

        start_time = time.time()
        name = "rwr-" + names[i]
        rwr_curve = roc_curve(output_RWR, ground_truth_vec, name)
        end_time = time.time()
        print("time for roc curve, rwr:", end_time - start_time)

        start_time = time.time()
        name = "pr-" + names[i]
        pr_curve = roc_curve(output_PR, ground_truth_vec, name)
        end_time = time.time()
        print("time for roc curve, pr:", end_time - start_time)
        start_time = time.time()

        start_time = time.time()
        name = "dk-" + names[i]
        dk_curve = roc_curve(output_DK, ground_truth_vec, name)
        end_time = time.time()
        print("time for roc curve, dk:", end_time - start_time)
        file_path = 'Results/' + names[i] + 'roc_curve.png'
        plt.ylabel('TPR')
        plt.xlabel('FPR')
        plt.title(names[i])
        plt.legend(loc='lower right')
        plt.savefig(file_path)  #moved from roc_curve
        plt.clf()  #moved from roc_curve
        print("Plots have been saved as png files in the Results folder.")

Python PageRank, moodstyle примеры использования