示例#1
0
    def POST(self):
        ix = web.ix
        web.header('Content-Type', 'application/json')
        post_input = web.input(_method='post')

        query_string = post_input["query_string"].strip()
        type = post_input["type"].strip()
        map = post_input["map"].strip()
        matched_docs = do_query(ix, query_string, type)

        if map == "y":
            map_data = compute_map_data(query_string, matched_docs, language)
            # for i in range(len(matched_docs)):
            #     matched_docs[i]["text"] = "\n".join(matched_docs[i]["text"].split("\n")[:5])
            return json.dumps({
                "result": matched_docs,
                "map_data": map_data
            },
                              indent=4,
                              sort_keys=True)
        else:
            # for i in range(len(matched_docs)):
            #     matched_docs[i]["text"] = "\n".join(matched_docs[i]["text"].split("\n")[:5])
            return json.dumps({"result": matched_docs},
                              indent=4,
                              sort_keys=True)
示例#2
0
def adder_page():
    errors = ""
    result = None
    print('it')
    print('rEQUEST IS', request)
    if request.method == "POST":
        print('chala')
        print(request)
        question = None
        print(1)
        try:
            question = request.get_data()
            question = str(question[6:-3])
            print(question)
            question = question.replace("+", " ")

        except:
            errors += "<p>{!r} is not a query_string.</p>\n"
        if question is not None:
            print(question[2:-1])
            result = do_query(question[2:-1])
            print(question)
            return redirect(url_for('result_page', result=result))

    return '''
        <html>
            <body>
                {errors}
                <p>Enter your query:</p>
                <form method="post" action="""{{ url_for('result_page', result={res}) }}""">
                    <p><input name="Query" /></p>
                    <p><input type="submit" value="Search" /></p>
                </form>
            </body>
        </html>
    '''.format(errors=errors, res=result)
示例#3
0
# -*- coding: utf-8 -*-
import io
import sys
from spider import get_sale_games
from csv2sqlite import do_convert
from query import do_query
import requests

BOT_HOOK = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=yourkey"

if __name__ == '__main__':
    # to print utf-8 chars correctly
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

    get_sale_games(5)
    do_convert()

    data_price = do_query('price')
    r = requests.post(BOT_HOOK, json=data_price)

    data_pop = do_query('pop')
    r = requests.post(BOT_HOOK, json=data_pop)

    data_pos = do_query('pos')
    r = requests.post(BOT_HOOK, json=data_pos)

    print(r.text + "\r\n")
    print(r.status_code, r.reason)
示例#4
0
for testbed_no in range(1, 17):
    if testbed_no == 10:
        # Testbed 10 is missing relevance information and this breaks
        # our code. So we chose to ignore it.
        continue
    collection = os.path.join('testbeds', 'testbed%d' % testbed_no)
    print('Results for Testbed %d' % testbed_no)
    init_map = 0
    final_map = 0
    for query_num in ['1', '2', '3', '4', '5']:
        with open(os.path.join(collection, 'query.%s' % query_num)) as fin:
            query_words = fin.read().strip().lower().split(' ')
            print('Query {0} with query words {1}'.format(query_num, query_words))

            accum, titles, top_words = do_query(collection, query_words)
            results = sorted(accum, key=accum.__getitem__, reverse=True)
            num_results = min(len(results), parameters.num_results)
            results = results[:num_results]

            # print top results
            print('Initial Results:')
            for i in range(num_results):
                print("{0:10.8f} {1:5} {2}".format(accum[results[i]], results[i],
                                                   titles[results[i]]))

            init_map += analyze_results(results, collection, query_num, 'initial')

            results, accum, titles = blind_relevance_feedback(query_words, results, titles, top_words, collection)
            results = results[:num_results]
示例#5
0
    def POST(self):
        def convert_docs_into_vector(train_set, language):
            stopWords = getStopWords(language)
            vectorizer = TfidfVectorizer(stop_words=stopWords,
                                         use_idf=True,
                                         sublinear_tf=False,
                                         norm=None,
                                         smooth_idf=True)

            trainVectorizerArray = vectorizer.fit_transform(train_set)

            return trainVectorizerArray, vectorizer.get_feature_names()

        def getTopics(topic_pickle, n_top_words=500):
            [model, topic_vectors, vocab] = topic_pickle
            data = {}
            for i in range(len(topic_vectors)):

                topic_dist = topic_vectors[i]
                topic_words = np.array(vocab)[np.argsort(
                    topic_dist)][:-(n_top_words + 1):-1]
                a = zip(
                    topic_words,
                    topic_dist[np.argsort(topic_dist)][:-(n_top_words + 1):-1])
                b = [[t[0], t[1]] for t in a]
                data[i] = b

            return data

        '''
            Begin of API definition
        '''

        ix = web.ix
        topic_pickle = web.topic_pickle

        web.header('Content-Type', 'application/json')
        post_input = web.input(_method='post')
        print "function: ", post_input["function"]
        if post_input["function"] == "get_topic":
            n = int(post_input["n"])
            topic_data = getTopics(topic_pickle, n)
            return json.dumps(topic_data, indent=4, sort_keys=True)

        if post_input["function"] == "search":
            [_, topic_vectors, topic_terms] = topic_pickle
            topic_index = post_input["topic_index"]
            query_string = post_input["query_string"].strip()
            map = post_input["map"].strip()
            type = post_input["type"].strip()
            matched_docs = do_query(ix, query_string, type)

            # get all matched docs
            docs = [matched_docs[i]["text"] for i in range(len(matched_docs))]

            # convert this docs into vectors
            doc_vectors, doc_terms = convert_docs_into_vector(
                docs, language)  # list of vectors and terms of return docs

            # conv
            topic_vec1 = np.array(topic_vectors[int(
                topic_index)])  # topic vec from topic vectors
            topic_dic = {
                topic_terms[i]: topic_vec1[i]
                for i in range(len(topic_terms))
            }
            topic_vec2 = np.array([])

            for i in range(len(doc_terms)):
                v = 0
                if doc_terms[i] in topic_dic:
                    v = topic_dic[doc_terms[i]]
                topic_vec2 = np.append(topic_vec2, v)

            scores2 = np.dot(doc_vectors.toarray(), np.transpose(
                topic_vec2))  # relevant between topic and documents
            scores1 = [
                matched_docs[i]["score"] for i in range(len(matched_docs))
            ]  #orginal scores
            scores3 = [scores1[i] + scores2[i] for i in range(len(scores1))
                       ]  # combination of two scores
            for i in range(len(matched_docs)):
                matched_docs[i]["score"] = scores3[i]

            print[matched_docs[i]["score"] for i in range(len(matched_docs))]
            matched_docs = sorted(matched_docs,
                                  key=lambda k: k['score'],
                                  reverse=True)
            print[matched_docs[i]["score"] for i in range(len(matched_docs))]

            if map == "y":
                map_data = compute_map_data(query_string, matched_docs,
                                            language)

                # for i in range(len(matched_docs)):
                #     matched_docs[i]["text"] = "\n".join(matched_docs[i]["text"].split("\n")[:5]) # keep 5 first lines

                return json.dumps(
                    {
                        "result": matched_docs,
                        "map_data": map_data
                    },
                    indent=4,
                    sort_keys=True)

            else:
                # for i in range(len(matched_docs)):
                #     matched_docs[i]["text"] = "\n".join(matched_docs[i]["text"].split("\n")[:5]) # keep 5 first lines

                return json.dumps({"result": matched_docs},
                                  indent=4,
                                  sort_keys=True)
示例#6
0
from query import do_query
print(do_query("Where is Pune"))