def main(): if len(sys.argv) != 2: print('Python LePrEF 2015') print('Usage: python3 %s argv[1]' % sys.argv[0]) print('argv[1]: Execution configuration file') print('\tJson Execution configuration file must have the attributes below:') print('\t {') print('\t ftrainame:\t\ttrain file name,') print('\t fvaliname:\t\tvalidation file name,') print('\t ftestname:\t\ttest file name,') print('\t ffiltersresults:\tfilters result file name,') print('\t foutputname:\tAOL output file name,') print('\t resultdir:\t\tOut directory for results,') print('\t randomseed:\t\tRandom Seed,') print('\t popsize:\t\tPopulation size,') print('\t generations:\tGenerations number,') print('\t cxprob:\t\tCrossover Probability,') print('\t mutprob:\t\tMutation Probability,') print('\t hoflen:\t\tHall of Fame length') print('\t }') return 1 #Load config file fexecname = sys.argv[1] exec_config = load_config_file(fexecname) ftrainame = exec_config['ftrainame'] fvaliname = exec_config['fvaliname'] ftestname = exec_config['ftestname'] ffiltersresults = exec_config['ffiltersresults'] foutputname = exec_config['foutputname'] resultdir = exec_config['resultdir'] randomseed = int(exec_config['randomseed']) popsize = int(exec_config['popsize']) generations = int(exec_config['generations']) cxprob = float(exec_config['cxprob']) mutprob = float(exec_config['mutprob']) hoflen = int(exec_config['hoflen']) #end load config file #Start up print print('Bm25GP [%s] - %s [%s]' % (fexecname, pwd.getpwuid(os.getuid())[4],os.getlogin()), '\nRunning on Python',sys.version) logger(resultdir, 'Bm25GP [%s] - %s [%s]' % (fexecname, pwd.getpwuid(os.getuid())[4],os.getlogin()), '\nRunning on Python',sys.version) #Set pset and toolbox pset = create_primitiveset() toolbox = create_toolbox(pset) set_gpoperator(toolbox, pset) #set_evaluate(toolbox, evalQuery, queries, index, aolstats, results) #results print('Reading results from "' + ffiltersresults + '"...') logger(resultdir, 'Reading results from "' + ffiltersresults + '"...') filtersresults_data = load_results(ffiltersresults)['results'] print('Processing results...') logger(resultdir, 'Processing results...') results = create_result_dict(filtersresults_data) print('Results ready!') logger(resultdir, 'Results ready!') #Get train data print('Reading train queries from "' + ftrainame + '"...') logger(resultdir, 'Reading train queries from "' + ftrainame + '"...') queries = lepref_util.carregar_queries(ftrainame)[0] #lepref_util.configurar_idcg_medio(queries, topN = MAXEVAL) print(len(queries), 'queries read!') logger(resultdir, len(queries), 'queries read!') #Create Bm25 Index print('Creating index from queries...') logger(resultdir, 'Creating index from queries...') index = bm25.Bm25Index() index.generate_from_queries(queries) print('Index created with', len(index), 'terms!') logger(resultdir, 'Index created with', len(index), 'terms!') #Set evaluate function and data print('Reading AOL output file from "' + foutputname + '"...') logger(resultdir, 'Reading AOL output file from "' + foutputname + '"...') aoldata, featurelist = aol_parser.output_read(foutputname) print('Precomputing AOL statistics...') logger(resultdir, 'Precomputing AOL statistics...') aolstats = process_aol_stats(aoldata) print(len(aolstats['stats']), 'AOL statistics created!') logger(resultdir, len(aolstats['stats']), 'AOL statistics created!') set_evaluate(toolbox, evalQuery, queries, index, aolstats, results) print('All data is ready!') logger(resultdir, 'All data is ready!') if os.path.exists(resultdir + os.sep + 'checkpoint.data'): #Load Checkpoint print('Checkpoint found! Loading...') logger(resultdir, 'Checkpoint found! Loading...') pop, igen, stats, hof, logbook, randomstate = load_checkpoint(resultdir) random.setstate(randomstate) else: igen = 0 logbook = None pop, hof, stats = prepare_gp(toolbox, randomseed, popsize, hoflen) starttime = datetime.now() if igen == 0: print('Starting new evolution at ', starttime,'!', sep = '') logger(resultdir, 'Starting new evolution at ', starttime,'!', sep = '') else: print('Resuming evolution in generation ', igen, ' at ', starttime ,'!', sep = '') logger(resultdir, 'Resuming evolution in generation ', igen, ' at ', starttime ,'!', sep = '') # eaCheckpoint(pop, cxprob, MutProb, igen, generations, stats, halloffame=hof, # logbook=logbook, cpfile_location = dirresult) eaCheckpoint(pop, toolbox, cxprob, mutprob, igen, generations, resultdir, stats, halloffame=hof, logbook=logbook) endtime = datetime.now() print('Evolution training finishes at ', endtime,'!', sep = '') print('Total time: ', endtime - starttime) logger(resultdir, 'Evolution training finishes at ', endtime,'!', sep = '') logger(resultdir, 'Total time: ', endtime - starttime) #validating #Get validation data print('Prepair for validation and test!') logger(resultdir, 'Prepair for validation and test!') queries = lepref_util.carregar_queries(fvaliname)[0] #Set validation data to toolbox.evaluate set_evaluate(toolbox, evalQuery, queries, index, aolstats, results) #run validation global tobedone tobedone = len(hof) valiresults = [evaluation[0] for evaluation in map(toolbox.evaluate, hof)] #output validation print(':validation:') logger(resultdir, ':validation:') for vali in valiresults: print(vali) logger(resultdir, vali) bestvali = max(zip(hof, valiresults), key = lambda z : z[1]) print(':bestvalidation:') print(bestvali[1]) logger(resultdir, ':bestvalidation:') logger(resultdir, bestvali[1]) #Testing #Get test data queries = lepref_util.carregar_queries(ftestname)[0] #Set test data to toolbox.evaluate set_evaluate(toolbox, evalQuery, queries, index, aolstats, results) #run test tobedone = 1 resulttest = toolbox.evaluate(bestvali[0]) #output test print(':test:') print(resulttest[0]) logger(resultdir, ':test:') logger(resultdir, resulttest[0])
def main(): if len(sys.argv) != 5: print('Python 2015') print('Uso: python3 bm25.py argv[1]') print('argv[1]: arquivo da base de dados') print('argv[2]: nome do arquivo onde serão salvos os dados') print('argv[3]: nome do diretório onde serão salvos os arquivos de resultados') print('argv[4]: nome do arquivo de range') # print('argv[x]:') sys.exit(-1) else: dbname = sys.argv[1] fdataname = sys.argv[2] dirresultname = sys.argv[3] rangefname = sys.argv[4] try: with open(rangefname, 'r') as rangefile: rangestring=rangefile.read() except Exception as error: raise error rangedict = json.loads(rangestring) qiini = rangedict['qiini'] fiini = rangedict['fiini'] qiend = rangedict['qiend'] fiend = rangedict['fiend'] global EXECSTHRESHOLD try: EXECSTHRESHOLD = rangedict['bkplen'] except KeyError: print('Using bkplen default =', EXECSTHRESHOLD) global NOSTOPWORDS try: NOSTOPWORDS = rangedict['nostopwords'] except KeyError: print('Using nostopwords default =', NOSTOPWORDS) #Filters global filters filters = { 1 : {'name': 'default(or)', 'query': True, 'posproc': False}, 2 : {'name': 'and', 'query': True, 'posproc': True}, 3 : {'name': 'filter', 'query': False, 'posproc': True} } if os.path.exists(fdataname): print("Carregando dados do arquivo ", fdataname, '...',sep = '') queries, index = load_data(fdataname) else: #Obter dados de treino print("Lendo queries...") queries, lista_invertida = lepref_util.carregar_queries(sys.argv[1]) print('Queries lidas:', len(queries)) index = Bm25Index() print("Adicionando queries ao índice") index.generate_from_queries(queries) save_data(queries, index, fdataname) lepref_util.configurar_idcg_medio(queries, topN = TOPN) ###Evaluations ##Simple # print('MeanNDCG: ', evaluate(queries, index, topN = TOPN)) ##Filters permutations doit = False if os.path.exists(dirresultname): print("Diretório ", dirresultname, " de resultado encontrado!", sep = '') while (True): inputstring = input('Deseja calcular novamente?(N,y): ') if inputstring in ['Y','y']: doit = True break elif inputstring in ['N','n', '']: print('Saindo...') sys.exit() else: print('Opção <', inputstring, '> Invalida!', sep='') else: doit = True # print_idf(index) if doit: #print(count_execs(queries, qiini, fiini, qiend, fiend)) #results = evaluate_filters(queries, index, topN = TOPN, qiini = 1, fiini = 0, qiend = 2, fiend = 8) evaluate_filters(queries, index, dirresultname, topN = TOPN, qiini = qiini, fiini = fiini, qiend = qiend, fiend = fiend) #evaluate_filters(queries, index, dirresultname, topN = TOPN, qiini = 0, fiini = 0, qiend = 3, fiend = None) #save_results(results, dirresultname) print('Docs lidos:', len(index.doc)) print('AVGDL:', index.avgdl) print('Vocabulary Length:', len(index))