Пример #1
0
def test_query(u, v, t, input_list, dimen, no_similar, a, **pca):
    stop_words = set(stopwords.words('english'))

    word2vec_value = np.load('word2vec_value.npy')
    model = Word2Vec.load('model_word2vec.bin')

    district = input_list[0]
    state = input_list[1]
    sent = input_list[2]

    sent = data_cleaner.sentence_cleaner(sent)
    # sent_words = [district,state] + sent.split(" ")
    sent_words = sent.split(" ")

    sent_value = np.array([0.0 for k in range(dimen)])

    sent_new = []
    count = 0

    for i in sent_words:
        if i not in stop_words:
            try:
                # if count in [0]:
                #     factor = a/(a + u[i]/u.N())
                # if count in [1]:
                #     factor = a/(a + v[i]/v.N())
                # if count in [2,3,4]:
                #     factor = a/(a + t[i]/t.N())
                factor = a / (a + t[i] / t.N())  #1
                # factor = 1000
                sent_value += model[i] * factor
                count += 1

            except:
                count += 1
                continue

    sent_value = sent_value / count

    if pca == 'Yes':
        sent_value = sent_value - u_ut * np.array(sent_value)

    all_dist = []
    for i in word2vec_value:
        dist = scipy.spatial.distance.cosine(i, sent_value)
        all_dist.append(dist)

    k = no_similar
    ind = np.argpartition(all_dist, k)[:k]

    return ind
Пример #2
0
print '\nSo %s, How can I help you?'%(name)
query = raw_input('')

print '\nSure, which district are you from?'
district = raw_input('')

print '\nAnd which state?'
state = raw_input('')

print '\nGive me a second\n\n'


#### Answer #####
query = query.lower()
input_list = [district,state,query]
query = data_cleaner.sentence_cleaner(query)


# weather_api.daily(district+','+state)
if 'weather' in query:
    weat = weather_api.daily(district+','+state)
    print '%s\n'%(weat)
    print 'Would you like to the weather forecast for the coming week ?\n'
    week = raw_input('').lower()
    if week == 'yes':
        weat = weather_api.weekly(district+','+state)
        print weat
    
    print '\nThank You for chatting'
    print '\nFor further information, contact KCC'
print '\nAnd which state?'
state = 'maharashtra'  #raw_input('')

print '\nGive me a second\n\n'

#### test #####

predicted_ans_list = []
predicted_query_list = []

if 'weather' in query:
    print 'weather query not to be tested'
else:
    query = query.lower()
    query_t = data_cleaner.sentence_cleaner(query)
    input_list = [district, state, query_t]
    print input_list

    u, v, t, new_maharashtra, maharashtra = wv.pre('all_files.csv')
    ind = wv.test_query(u, v, t, input_list, dimen, k, a, pca=pca_text)

    # wv.print_ans(ind, maharashtra, k)

    maharashtra = maharashtra.reset_index()
    for i in ind:
        predicted_query_list.append(maharashtra['Query'][i])
        # print 'Answer: %s\n\n'%(maharashtra['Ans'][i])
        exec('mah_list=%s' % (maharashtra['Ans'][i]))
        for j in mah_list:
            predicted_ans_list.append(j)
Пример #4
0
    query = query.lower()
    input_list = [district, state, query]

    ind = wv.test_metric(u,
                         v,
                         t,
                         query,
                         dimen,
                         k,
                         a,
                         model,
                         word2vec_value,
                         pca=pca_text)
    pdf = maharashtra.reset_index()

    query_list = data_cleaner.sentence_cleaner(query)
    fin_index = wv.entity(ind, query_list, pdf)

    lesk_score = similarity.compute_lesk_score(query,
                                               pdf['Query'][ind[fin_index]])
    jaccard_score = similarity.compute_jaccard_sim(
        query, pdf['Query'][ind[fin_index]])

    # print pdf['Query'][ind[fin_index]]

    if lesk_score > threshold_lesk:
        count_lesk_threshold += 1

    if lesk_score > 0:
        count_lesk += 1