Exemplo n.º 1
0
def main_search(Query, WebQuery=None, EmailQuery=None):
    """
    Given a query, return our search result.

    """

    Links = google_search_engine(Query)

    [Links_RemoveEmpty, QuestionVotes, QuestionContent, AnswerContent, WebResult] = scrape_webs(Links)

    log("Orignal link:")
    log(Links_RemoveEmpty)

    # extract chars from string error
    [Query_clean] = SortUtils.cleanStrings([Query])
    QueryChars = SortUtils.target_words_extract(Query_clean)

    QuestionContent = SortUtils.cleanStrings(QuestionContent)
    AnswerContent = SortUtils.cleanStrings(AnswerContent)
    QuestionAndAnswerContent = ["%s %s" % Content for Content in zip(QuestionContent, AnswerContent)]

    # cal tfidf
    Tfidf_table = tfidf.tfidf()
    index = 0
    for content in QuestionAndAnswerContent:
        index = index + 1
        content_remove_punc = SortUtils.remove_punctuation(content)
        content_words_list = SortUtils.target_words_extract(content_remove_punc)
        content_words_list = [word.encode("ascii", "ignore") for word in content_words_list]
        Tfidf_table.addDocument(str(index), content_words_list)

    SimilaritiesResult = Tfidf_table.similarities(QueryChars)
    SimilaritiesResult = [Result[1] for Result in SimilaritiesResult]

    SimilaritiesResult = SortUtils.normalize(SimilaritiesResult)
    QuestionVotes = SortUtils.mathlog(QuestionVotes)
    QuestionVotes = SortUtils.normalize(QuestionVotes)
    # log("after QuestionVotes")
    # log(QuestionVotes)

    FitValue = [sim * 0.6 + que * 0.4 for sim, que in zip(SimilaritiesResult, QuestionVotes)]
    Index_sortedby_fit = sorted(range(len(FitValue)), key=lambda k: FitValue[k], reverse=True)
    Index_sortedby_sim = sorted(range(len(SimilaritiesResult)), key=lambda k: SimilaritiesResult[k], reverse=True)
    log("sort by similarity")
    log(Index_sortedby_sim)
    log("votes")
    log(QuestionVotes)
    log("sort by Fitvalue")
    log(Index_sortedby_fit)

    [sorted_Links_RemoveEmpty, sorted_WebResult] = SortUtils.filter_result(
        [Links_RemoveEmpty, WebResult], Index_sortedby_fit
    )
    log("new sorted links")
    log(sorted_Links_RemoveEmpty)

    log("sorted_WebResult")
    log(sorted_WebResult)
    log(WebQuery)
    if WebQuery:
        return sorted_WebResult
    if EmailQuery:
        return sorted_Links_RemoveEmpty[0]