def evaluate_function(query, index, filterproduct, termslist, topN = TOPN): ''' ''' rank = index.filters_query(' '.join(termslist), filterproduct) vetor_ganho_consulta = obter_vetor_ganho(rank, query) ndcg_q = lepref_util.ndcg_unico(vetor_ganho_consulta, query.idcg, topN, chave = lambda e: e) mean_ndcg_atual = lepref_util.ndcg_medio_unico(ndcg_q) return (filterproduct, mean_ndcg_atual)
def evaluate(queries, index, topN = TOPN): acumulador_ndcg = 0.0 for query in queries: #rank = efetuar consulta rank = index.simple_query(' '.join([term.word for term in query.term])) ##computados #vetor_ganho_consulta = obter_vetor_ganho(rank, queries) vetor_ganho_consulta = obter_vetor_ganho(rank, query) #ndcg_q = lepref_util.ndcg_unico(vetor_ganho, query.idcg, topN, chave = lambda e: e[0]) if (len(query.idcg) != 40): #TODO remover print(len(query.idcg), query.queryid) ndcg_q = lepref_util.ndcg_unico(vetor_ganho_consulta, query.idcg, topN, chave = lambda e: e) #query.mean_ndcg_atual = lepref_util.ndcg_medio_unico(ndcg_q) query.mean_ndcg_atual = lepref_util.ndcg_medio_unico(ndcg_q) #acumulador_ndcg += query.mean_ndcg_atual acumulador_ndcg += query.mean_ndcg_atual return acumulador_ndcg/len(queries)
def evaluate_filters(queries, index, dirresultname, topN = TOPN, qiini = None, fiini = None, qiend = None, fiend = None): start_time = time.time() mini_time = start_time print(time.strftime("Start at: %a, %d %b %Y %H:%M:%S", time.localtime())) # products = {} results = [] countdict, totalqueries = count_execs(queries, qiini, fiini, qiend, fiend) queriesdone = 0 miniqueriesdone = 0 seq = 1 try: queries = queries[qiini:qiend+1] #exceção necessária para validar a soma em queries = queries[qiini:qiend+1] except TypeError as error: if type(qiend) == type(None): queries = queries[qiini:] else: raise(error) #Configura posqiini para salvamento de relatório posteriormente posqiini = 0 if type(qiini) == int: posqiini = qiini elif qiini == None: qiini = 0 posfiini = 0 if type(fiini) == int: posfiini = fiini if fiini == None: fiini = 0 for qi, query in enumerate(queries): if NOSTOPWORDS: termslist = remove_stopwords(query) else: termslist = [term.word for term in query.term] #n_jobs = len(productslist) n_jobs = len(filters) ** len(termslist) productsresults = [] filteriterator = itertools.product(filters.keys(), repeat = len(termslist)) if qi == 0 and fiini: if fiini < 0: print('fiini não pode ser negativo') print('fiini:', fiini) sys.exit() if fiini >= n_jobs: print('O número inicial do filtro não pode ser maior ou igual a quantidade de filtros ') print('fiini:', fiini) print('Nfiltros:', n_jobs) sys.exit() filteriterator = itertools.islice(filteriterator, fiini, n_jobs) if qi == len(queries)-1 and fiend != None: if fiend < 0: print('fiend não pode ser negativo') print('fiend:', fiend) sys.exit() if fiend >= n_jobs: print('O índice final de filtros não pode ser maior ou igual a quantidade de filtros ') print('fiend:', fiend) print('Nfiltros:', 3**len(termslist)) sys.exit() if qi == 0: filteriterator = itertools.islice(filteriterator, fiend + 1 - fiini) else: filteriterator = itertools.islice(filteriterator, fiend + 1) # try: # filteriterator = itertools.islice(filteriterator, fiend + 1 - fiini) # except ValueError as error: # print('qi', qi + qiini) # print('fiend', fiend) # print('fiini', fiini) # print('fiend + 1 - fiini', fiend + 1 - fiini) # raise(error) for fi, filterproduct in enumerate(filteriterator): #Evalue query rank = index.filters_query(' '.join(termslist), filterproduct) vetor_ganho_consulta = obter_vetor_ganho(rank, query) ndcg_q = lepref_util.ndcg_unico(vetor_ganho_consulta, query.idcg, topN, chave = lambda e: e) query.mean_ndcg_atual = lepref_util.ndcg_medio_unico(ndcg_q) queriesdone += 1 miniqueriesdone += 1 #Process tracker #(56%) 1541 of 5125 queries. Query 650 processing (34%) 14 of 59 filters... #reset mini time if time.time() - mini_time > 1: print('\rExecs: %d/%d (%2.2f%%) Query: %d/%d (%2.2f%%) Processing %d/%d (%2.2f%%) %.1fq/s (%.1fq/s)' % ( queriesdone, totalqueries, (queriesdone)/totalqueries * 100, (qi+1), len(queries), (qi+1)/len(queries) * 100, fi+1, n_jobs, (fi+1)/n_jobs * 100, (queriesdone) / (time.time() - start_time), (miniqueriesdone) / (time.time() - mini_time)), end = '') mini_time = time.time() miniqueriesdone = 0 #Add result to products results # if qi == 0: # productsresults.append((fi + fiini, query.mean_ndcg_atual)) # else: # productsresults.append((fi, query.mean_ndcg_atual)) productsresults.append((filterproduct, query.mean_ndcg_atual)) # Implementar salvar arquivo if queriesdone%EXECSTHRESHOLD == 0: queryresult = (query.queryid, termslist, productsresults) results.append(queryresult) posqiend = qi + qiini if qi == 0: posfiend = fiini + fi else: posfiend = fi #Save results save_results(results, dirresultname, posqiini, posfiini, posqiend, posfiend, seq) #Configura nova rodada de armazenamento seq += 1 if fi+1 == n_jobs: posqiini = qiini + qi + 1 # next query posfiini = 0 else: posqiini = qiini + qi # same query next filter if qi == 0: # soma o inicio do intervalo na posição if fiini == None: posfiini = fi + 1 else: posfiini = fiini + fi + 1 else: posfiini = fi + 1 del(productsresults) del(results) productsresults = [] results = [] if productsresults: #Add query results to final results queryresult = (query.queryid, termslist, productsresults) results.append(queryresult) if results: posqiend = qi + qiini posfiend = fi + fiini save_results(results, dirresultname, posqiini, posfiini, posqiend, posfiend, seq) print('\rExecs: %d/%d (%2.2f%%) Query: %d/%d (%2.2f%%) Processing %d/%d (%2.2f%%) %.1fq/s' % ( queriesdone, totalqueries, (queriesdone)/totalqueries * 100, (qi+1), len(queries), (qi+1)/len(queries) * 100, fi+1, n_jobs, (fi+1)/n_jobs * 100, (queriesdone) / (time.time() - start_time)) ) print(time.strftime("Ends at: %a, %d %b %Y %H:%M:%S", time.localtime())) print("--- %s seconds ---" % (time.time() - start_time))