def POST(self): ix = web.ix web.header('Content-Type', 'application/json') post_input = web.input(_method='post') query_string = post_input["query_string"].strip() type = post_input["type"].strip() map = post_input["map"].strip() matched_docs = do_query(ix, query_string, type) if map == "y": map_data = compute_map_data(query_string, matched_docs, language) # for i in range(len(matched_docs)): # matched_docs[i]["text"] = "\n".join(matched_docs[i]["text"].split("\n")[:5]) return json.dumps({ "result": matched_docs, "map_data": map_data }, indent=4, sort_keys=True) else: # for i in range(len(matched_docs)): # matched_docs[i]["text"] = "\n".join(matched_docs[i]["text"].split("\n")[:5]) return json.dumps({"result": matched_docs}, indent=4, sort_keys=True)
def adder_page(): errors = "" result = None print('it') print('rEQUEST IS', request) if request.method == "POST": print('chala') print(request) question = None print(1) try: question = request.get_data() question = str(question[6:-3]) print(question) question = question.replace("+", " ") except: errors += "<p>{!r} is not a query_string.</p>\n" if question is not None: print(question[2:-1]) result = do_query(question[2:-1]) print(question) return redirect(url_for('result_page', result=result)) return ''' <html> <body> {errors} <p>Enter your query:</p> <form method="post" action="""{{ url_for('result_page', result={res}) }}"""> <p><input name="Query" /></p> <p><input type="submit" value="Search" /></p> </form> </body> </html> '''.format(errors=errors, res=result)
# -*- coding: utf-8 -*- import io import sys from spider import get_sale_games from csv2sqlite import do_convert from query import do_query import requests BOT_HOOK = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=yourkey" if __name__ == '__main__': # to print utf-8 chars correctly sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') get_sale_games(5) do_convert() data_price = do_query('price') r = requests.post(BOT_HOOK, json=data_price) data_pop = do_query('pop') r = requests.post(BOT_HOOK, json=data_pop) data_pos = do_query('pos') r = requests.post(BOT_HOOK, json=data_pos) print(r.text + "\r\n") print(r.status_code, r.reason)
for testbed_no in range(1, 17): if testbed_no == 10: # Testbed 10 is missing relevance information and this breaks # our code. So we chose to ignore it. continue collection = os.path.join('testbeds', 'testbed%d' % testbed_no) print('Results for Testbed %d' % testbed_no) init_map = 0 final_map = 0 for query_num in ['1', '2', '3', '4', '5']: with open(os.path.join(collection, 'query.%s' % query_num)) as fin: query_words = fin.read().strip().lower().split(' ') print('Query {0} with query words {1}'.format(query_num, query_words)) accum, titles, top_words = do_query(collection, query_words) results = sorted(accum, key=accum.__getitem__, reverse=True) num_results = min(len(results), parameters.num_results) results = results[:num_results] # print top results print('Initial Results:') for i in range(num_results): print("{0:10.8f} {1:5} {2}".format(accum[results[i]], results[i], titles[results[i]])) init_map += analyze_results(results, collection, query_num, 'initial') results, accum, titles = blind_relevance_feedback(query_words, results, titles, top_words, collection) results = results[:num_results]
def POST(self): def convert_docs_into_vector(train_set, language): stopWords = getStopWords(language) vectorizer = TfidfVectorizer(stop_words=stopWords, use_idf=True, sublinear_tf=False, norm=None, smooth_idf=True) trainVectorizerArray = vectorizer.fit_transform(train_set) return trainVectorizerArray, vectorizer.get_feature_names() def getTopics(topic_pickle, n_top_words=500): [model, topic_vectors, vocab] = topic_pickle data = {} for i in range(len(topic_vectors)): topic_dist = topic_vectors[i] topic_words = np.array(vocab)[np.argsort( topic_dist)][:-(n_top_words + 1):-1] a = zip( topic_words, topic_dist[np.argsort(topic_dist)][:-(n_top_words + 1):-1]) b = [[t[0], t[1]] for t in a] data[i] = b return data ''' Begin of API definition ''' ix = web.ix topic_pickle = web.topic_pickle web.header('Content-Type', 'application/json') post_input = web.input(_method='post') print "function: ", post_input["function"] if post_input["function"] == "get_topic": n = int(post_input["n"]) topic_data = getTopics(topic_pickle, n) return json.dumps(topic_data, indent=4, sort_keys=True) if post_input["function"] == "search": [_, topic_vectors, topic_terms] = topic_pickle topic_index = post_input["topic_index"] query_string = post_input["query_string"].strip() map = post_input["map"].strip() type = post_input["type"].strip() matched_docs = do_query(ix, query_string, type) # get all matched docs docs = [matched_docs[i]["text"] for i in range(len(matched_docs))] # convert this docs into vectors doc_vectors, doc_terms = convert_docs_into_vector( docs, language) # list of vectors and terms of return docs # conv topic_vec1 = np.array(topic_vectors[int( topic_index)]) # topic vec from topic vectors topic_dic = { topic_terms[i]: topic_vec1[i] for i in range(len(topic_terms)) } topic_vec2 = np.array([]) for i in range(len(doc_terms)): v = 0 if doc_terms[i] in topic_dic: v = topic_dic[doc_terms[i]] topic_vec2 = np.append(topic_vec2, v) scores2 = np.dot(doc_vectors.toarray(), np.transpose( topic_vec2)) # relevant between topic and documents scores1 = [ matched_docs[i]["score"] for i in range(len(matched_docs)) ] #orginal scores scores3 = [scores1[i] + scores2[i] for i in range(len(scores1)) ] # combination of two scores for i in range(len(matched_docs)): matched_docs[i]["score"] = scores3[i] print[matched_docs[i]["score"] for i in range(len(matched_docs))] matched_docs = sorted(matched_docs, key=lambda k: k['score'], reverse=True) print[matched_docs[i]["score"] for i in range(len(matched_docs))] if map == "y": map_data = compute_map_data(query_string, matched_docs, language) # for i in range(len(matched_docs)): # matched_docs[i]["text"] = "\n".join(matched_docs[i]["text"].split("\n")[:5]) # keep 5 first lines return json.dumps( { "result": matched_docs, "map_data": map_data }, indent=4, sort_keys=True) else: # for i in range(len(matched_docs)): # matched_docs[i]["text"] = "\n".join(matched_docs[i]["text"].split("\n")[:5]) # keep 5 first lines return json.dumps({"result": matched_docs}, indent=4, sort_keys=True)
from query import do_query print(do_query("Where is Pune"))