Exemplo n.º 1
0
 def search(self, query):
     augf = AnalyticUUIDGeneratorFactory()
     aug = augf.create()
     results = []
     for query1 in return_search_results(query.rawQuery):
         query1 = SearchQuery(type=SearchType.SENTENCES,
                              terms=query1.split(" "),
                              rawQuery=query1,
                              k=500)
         result = self.other.search(query1)
         # logging.info(result.searchResultItems)
         results.extend(result.searchResultItems)
     # results = SearchResult(searchResultItems=results, searchQuery=query)
     # logging.info(len(results))
     resultsDict = {}
     for result in results:
         resultsDict[result.sentenceId.uuidString] = result
     results = []
     for key in resultsDict:
         results.append(resultsDict[key])
     # results = results[:10] # comment out on full run
     comm_ids_list, temp = get_comm_ids(results)
     dictUUID = fetch_dataset(comm_ids_list, temp)
     inv_map = {v: k for k, v in dictUUID.items()}
     toHannah = []
     for uuid in dictUUID:
         toHannah.append([query.rawQuery, dictUUID[uuid]])
     resultItemRet = SearchResult(uuid=aug.next(),
                                  searchQuery=query,
                                  searchResultItems=results,
                                  metadata=AnnotationMetadata(
                                      tool="search",
                                      timestamp=int(time.time())),
                                  lang="eng")
     model = pickle.load(open("./trained_model.p", "rb"))
     pre = Preprocess()
     feature_matrix = pre.process_run(toHannah)
     dictRanks = pre_ranking(feature_matrix, model, toHannah, inv_map)
     results = rerank(dictRanks, resultItemRet)
     resultArr = results.searchResultItems
     resultArr = sorted(resultArr,
                        key=lambda result: result.score,
                        reverse=True)
     for item in resultArr:
         logging.info(item.score)
     resultItemRet = SearchResult(uuid=aug.next(),
                                  searchQuery=query,
                                  searchResultItems=resultArr,
                                  metadata=AnnotationMetadata(
                                      tool="search",
                                      timestamp=int(time.time())),
                                  lang="eng")
     return resultItemRet
def search_questions(host, port, question_texts, k):
    '''
    Given a hostname (or IP) and port of a search service to connect to,
    a dictionary `question_texts` mapping question IDs to question texts
    (strings), and the maximum number of hits to retrieve, k, send each
    question text to the search service and retrieve the results as a
    SearchResult object.  Before passing a question text to the search
    client (in a query), escape it using `escape_query`.  Return a
    dictionary mapping question IDs to the respective SearchResult
    objects.

    Note: k is passed in the SearchQuery; it is also used to truncate
    SearchResult.searchResultItems afterward (noting that some services
    do not respect the k parameter).  So the length of searchResultItems
    in the returned SearchResult objects will always be at most k.
    '''
    results = dict()
    with SearchClientWrapper(host, port) as search_client:
        for (question_id, question_text) in question_texts.items():
            result = search_client.search(
                SearchQuery(type=SearchType.SENTENCES,
                            rawQuery=escape_query(question_text),
                            k=k))
            result.searchResultItems = result.searchResultItems[:k]
            results[question_id] = result
    return results
Exemplo n.º 3
0
def test_search_communications():
    impl = FooSearch()
    host = 'localhost'
    port = find_port()
    timeout = 5

    terms = ['foo', 'bar']
    query = SearchQuery(type=SearchType.COMMUNICATIONS,
                        terms=[t for t in terms])

    with SubprocessSearchServiceWrapper(impl, host, port, timeout=timeout):
        transport = TSocket.TSocket(host, port)
        transport = TTransport.TFramedTransport(transport)
        protocol = TCompactProtocol.TCompactProtocol(transport)

        cli = Search.Client(protocol)
        transport.open()
        res = cli.search(query)
        transport.close()

        assert res.uuid.uuidString == '12345678-1234-5678-1234-567812345678'
        assert len(res.searchResults) == 2
        assert res.searchResults[0].communicationId == 'foo'
        assert res.searchResults[0].score == 42.
        assert res.searchResults[1].communicationId == 'bar'
        assert res.searchResults[1].score == 42.
        assert res.metadata.tool == 'Foo Search'
Exemplo n.º 4
0
def kscore(s):
    truth = []
    answer_labels = {}
    with open("dev-match.tsv") as match:
        reader = csv.reader(match, delimiter="\t", quotechar="'")
        for row in reader:
            answer_labels[row[3]] = row[4]

    with gzip.open("WikiQA-dev.tsv.gz", 'rt') as wiki:
        reader = csv.reader(wiki, delimiter="\t", quotechar="'")
        next(reader)
        used = {}
        k_val_dict = {1: [0, 0], 10: [0, 0], 100: [0, 0], 1000: [0, 0]}
        k_vals = [1, 10, 100, 1000]
        for row in reader:
            print(row)
            query = row[1]
            sentenceID = row[4]
            # query = query.replace(","," ")
            # query = query.replace("'"," ")
            query = query.replace('"', "")
            query = query.replace("/", " ")
            query = query.replace("?", "")
            if query not in used:
                used[query] = 0
                terms = query.split(" ")
                for k_val in k_vals:
                    query1 = SearchQuery(type=SearchType.SENTENCES,
                                         terms=terms,
                                         k=k_val,
                                         rawQuery=query)
                    results = s.search(query1)
                    atK = 0
                    totCorrect = 0
                    hasAnswerInMatch = False
                    for result in results.searchResultItems:
                        if atK == k_val:
                            break
                        else:
                            atK += 1
                            try:
                                totCorrect += int(answer_labels[
                                    result.sentenceId.uuidString])
                                hasAnswerInMatch = True
                            except (KeyError):
                                atK -= 1

                    if totCorrect >= 1:
                        k_val_dict[k_val][0] += 1
                    if hasAnswerInMatch:
                        k_val_dict[k_val][1] += 1

            else:
                continue
    print("Baseline success @k")
    print("1: {}".format(k_val_dict[1][0] / k_val_dict[1][1]))
    print("10: {}".format(k_val_dict[10][0] / k_val_dict[10][1]))
    print("100: {}".format(k_val_dict[100][0] / k_val_dict[100][1]))
    print("1000: {}".format(k_val_dict[1000][0] / k_val_dict[1000][1]))
Exemplo n.º 5
0
 def search(self, string):
     string = string.replace('"', "")
     string = string.replace("/", " ")
     string = string.replace("?", "")
     terms = string.split(" ")
     with SearchClientWrapper("172.18.0.4", 9090) as search_client:
         handler = SearchHandler(search_client, "wikiQA", "", "")
         query1 = SearchQuery(type=SearchType.SENTENCES,
                              terms=terms,
                              k=500,
                              rawQuery=string)
         return handler.search(query1)
Exemplo n.º 6
0
def main():
    parser = ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatter,
        description='Interface with a Concrete Search service')
    parser.add_argument('host',
                        help='Hostname of search service to which to'
                        ' connect.')
    parser.add_argument('port',
                        type=int,
                        help='Port of search service to which to connect.')
    parser.add_argument('--http-lookup-url',
                        type=str,
                        help='Look up result communication text from HTTP '
                        'service via provided URL template, for '
                        'example, http://localhost:3000/comm/id/%%s')
    parser.add_argument('--user-id',
                        type=str,
                        help='user id to send to search service')
    concrete.version.add_argparse_argument(parser)
    ns = parser.parse_args()

    out_f = codecs.getwriter('utf-8')(sys.stdout)

    with SearchClientWrapper(ns.host, ns.port) as client:
        while True:
            try:
                line = raw_input('> ').strip().decode('utf-8')
            except EOFError:
                print
                break
            if line:
                terms = line.split()
                query = SearchQuery(terms=terms,
                                    type=SearchType.COMMUNICATIONS,
                                    userId=ns.user_id)
                results = client.search(query)
                for result in results.searchResults:
                    if ns.http_lookup_url:
                        out_f.write(
                            requests.get(ns.http_lookup_url %
                                         result.communicationId).text + u'\n')
                    else:
                        out_f.write(result.communicationId + u'\n')
Exemplo n.º 7
0
def execute_search_query(search_client, terms, k):
    logging.debug("executing query '{}'".format(u' '.join(terms)))
    query = SearchQuery(type=SearchType.COMMUNICATIONS, terms=terms, k=k)
    result = search_client.search(query)
    return [(item.communicationId, item.score)
            for item in result.searchResultItems]