Exemplo n.º 1
0
    relevantAuthors = set(relAuthorsDocSimilarity).union(set(relAuthorsDocCitations))
    logging.debug("Total number of relevant authors : " + str(len(relevantAuthors)))
    
    graph, authorIndexer = dataset.coauthorsGraph(field, relevantAuthors)
    #f = open(dataset.getCoauthorsFilename(field),'rb') #Save the graph of co-authors
    #storedlist=pickle.load(f)
    #for i in storedlist:
        #fgraph.write(str(i))
        #fgraph.write("\n"+ "Author vertices") 
        #fgraph.write(str(authorIndexer.getIdDict()))
        #fgraph.write(str(numpy.ones(graph.ecount())))
        #print(i)
    #fgraph.close()
    graph1, authorIndexer1 = dataset.GetIndexer(field, expertAuthors) #Get Index for the expert list BM25
    trainExpertMatches = dataset.matchExperts(relevantAuthors, dataset.trainExpertDict[field])   
    testExpertMatches = dataset.matchExperts(relevantAuthors, dataset.testExpertDict[field])     
    
    trainExpertMatchesInds = authorIndexer.translate(trainExpertMatches)
    testExpertMatchesInds = authorIndexer.translate(testExpertMatches) 
    relevantAuthorInds1 = authorIndexer.translate(relAuthorsDocSimilarity) 
    relevantAuthorInds2 = authorIndexer.translate(relAuthorsDocCitations) 
    relevantAuthorsInds = authorIndexer.translate(relevantAuthors)  
    expertAuthorsInds = authorIndexer1.translate(expertAuthors)#Get Ids our BM25 List
    
    assert (numpy.array(relevantAuthorInds1) < len(relevantAuthorsInds)).all()
    assert (numpy.array(relevantAuthorInds2) < len(relevantAuthorsInds)).all()
    
    if len(testExpertMatches) != 0:
        fich.write(field)
        fich.write("\n")