Python TFIDF_cal.target_words_extract примеры использования

Язык программирования: Python

Класс/Тип: TFIDF_cal

Метод/Функция: target_words_extract

Примеров на hotexamples.com: 2

Python TFIDF_cal.target_words_extract - 2 примера найдено. Это лучшие примеры Python кода для TFIDF_cal.target_words_extract, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

cleanStrings(3)

filter_result(2)

target_words_extract(2)

cal_tfidf(1)

mathlog(1)

normalize(1)

rank_tfidfMatrix(1)

remove_punctuation(1)

Пример #1

Показать файл

Файл: DB_Search.py Проект: nikign/Git-Helper

def main_search(Query, WebQuery = None, EmailQuery = None,Path=None):
    data = pd.read_csv(Path+ '/DumpDB.csv')
    #data= pickle.load('DumpDB.dump')

    Links = data['1311WebLinks']
    Abstracts = data['1311WebAbstracts']


    #extract chars from string error
    [Query_clean] = SortUtils.cleanStrings([Query])
    QueryChars = SortUtils.target_words_extract(Query_clean)

    QueryChars = [c for c in QueryChars if c in data.column.values]




    QueryTFIDFMatrix = data[QueryChars]
    RankResult_index = SortUtils.rank_tfidfMatrix(QueryTFIDFMatrix, 0)

    [sorted_Links] = \
    SortUtils.filter_result([Links],RankResult_index)

    if WebQuery: return None
    if EmailQuery: return sorted_Links[0]

Пример #2

Показать файл

Файл: googlesearch.py Проект: nikign/Git-Helper

def main_search(Query, WebQuery=None, EmailQuery=None):
    """
    Given a query, return our search result.

    """

    Links = google_search_engine(Query)

    [Links_RemoveEmpty, QuestionVotes, QuestionContent, AnswerContent, WebResult] = scrape_webs(Links)

    log("Orignal link:")
    log(Links_RemoveEmpty)

    # extract chars from string error
    [Query_clean] = SortUtils.cleanStrings([Query])
    QueryChars = SortUtils.target_words_extract(Query_clean)

    QuestionContent = SortUtils.cleanStrings(QuestionContent)
    AnswerContent = SortUtils.cleanStrings(AnswerContent)
    QuestionAndAnswerContent = ["%s %s" % Content for Content in zip(QuestionContent, AnswerContent)]

    # cal tfidf
    Tfidf_table = tfidf.tfidf()
    index = 0
    for content in QuestionAndAnswerContent:
        index = index + 1
        content_remove_punc = SortUtils.remove_punctuation(content)
        content_words_list = SortUtils.target_words_extract(content_remove_punc)
        content_words_list = [word.encode("ascii", "ignore") for word in content_words_list]
        Tfidf_table.addDocument(str(index), content_words_list)

    SimilaritiesResult = Tfidf_table.similarities(QueryChars)
    SimilaritiesResult = [Result[1] for Result in SimilaritiesResult]

    SimilaritiesResult = SortUtils.normalize(SimilaritiesResult)
    QuestionVotes = SortUtils.mathlog(QuestionVotes)
    QuestionVotes = SortUtils.normalize(QuestionVotes)
    # log("after QuestionVotes")
    # log(QuestionVotes)

    FitValue = [sim * 0.6 + que * 0.4 for sim, que in zip(SimilaritiesResult, QuestionVotes)]
    Index_sortedby_fit = sorted(range(len(FitValue)), key=lambda k: FitValue[k], reverse=True)
    Index_sortedby_sim = sorted(range(len(SimilaritiesResult)), key=lambda k: SimilaritiesResult[k], reverse=True)
    log("sort by similarity")
    log(Index_sortedby_sim)
    log("votes")
    log(QuestionVotes)
    log("sort by Fitvalue")
    log(Index_sortedby_fit)

    [sorted_Links_RemoveEmpty, sorted_WebResult] = SortUtils.filter_result(
        [Links_RemoveEmpty, WebResult], Index_sortedby_fit
    )
    log("new sorted links")
    log(sorted_Links_RemoveEmpty)

    log("sorted_WebResult")
    log(sorted_WebResult)
    log(WebQuery)
    if WebQuery:
        return sorted_WebResult
    if EmailQuery:
        return sorted_Links_RemoveEmpty[0]