Exemplo n.º 1
0
parser.add_argument("-d", "--useDocs", action="store_true", help="Use document database to find relevant authors")
args = parser.parse_args()

averagePrecisionN = 20 
ns = numpy.arange(5, 55, 5)
runLSI = not args.runLDA
knownAuthors = not args.useDocs

dataset = ArnetMinerDataset(runLSI=runLSI, knownAuthors=knownAuthors) 
#dataset.dataFilename = dataset.dataDir + "DBLP-citation-100000.txt"
#dataset.dataFilename = dataset.dataDir + "DBLP-citation-1000000.txt"
#dataset.dataFilename = dataset.dataDir + "DBLP-citation-5000000.txt"
#dataset.dataFilename = dataset.dataDir + "DBLP-citation-7000000.txt"
dataset.dataFilename = dataset.dataDir + "DBLP-citation-Feb21.txt" 
dataset.minDf = 10**-4
dataset.ks = [100, 200, 300, 400, 500, 600]
dataset.minDfs = [10**-3, 10**-4]
dataset.overwriteGraph = True
dataset.overwriteModel = True
dataset.overwriteVectoriser = True 

if not knownAuthors: 
    dataset.modelSelection()

#Sav the outputList and the graph
fich = open("//home//idexlab//" + "TotalMeasuresBM25" + ".txt", "w")
fgraph = open("//home//idexlab//" + "GraphTotal" + ".txt", "w")

for field in dataset.fields: 
    logging.debug("Field = " + field)
    if not knownAuthors: