Exemplo n.º 1
0
def check_courses_similarity():
    content = extract_contents(request, 'compute')

    # generate SQL filter query
    courses = get_all_data(filters=content.get('filter', ''))

    # calculate similarity
    courses_json_similarities = compute_similarity(content['input'], courses,
                                                   lsi_model, lsa_index)

    # generate response based on desired fomrat
    output = generate_response(
        content.get('output', {
            'format': 'json'
        }).get('format'), courses_json_similarities[:3])

    return output
Exemplo n.º 2
0
def compare_images(imageUrls: List[ImageSimilarityRequest]):
    result = compute_similarity(imageUrls)
    return result
Exemplo n.º 3
0
# from scrape import create_transcript_files
# from preprocess import create_docterm_matrix
from similarity import compute_similarity
import argparse
'''
Potential future improvements
    Store csv files on db so it doesn't take so long to load them
    Make website with UI
    Add user accounts in order to implement collaborative filtering
'''

# comment when scraping is complete
# create_transcript_files()

# comment when vector files have been
# create_docterm_matrix()

# parse run line for title flag and prompt user for keywords/title
parser = argparse.ArgumentParser(description='Comedy RecSys')
parser.add_argument('-T',
                    action='store_true',
                    help='Add -T flag search by title instead of query')
args = parser.parse_args()
prompt = '\nEnter name of a special: ' if args.T else '\nEnter search query: '
query = input(prompt)

# compute results of query and display
compute_similarity(query, args.T)
Exemplo n.º 4
0
 def fit(self, data_df, args):
     if Global.IN_SAMPLE_TESTING:
         self.actual_train_nearest_neighbors = self.find_nearest_neighbors(
             data_df)
     # Prepare inputs to fit and params #
     X = torch.tensor(np.array(data_df)).float()
     N = X.shape[0]
     self.train()
     epochs = args['epochs']
     criterion = InvLoss(args['lambda'])
     optimizer = optim.Adam(self.parameters(), lr=args['learning_rate'])
     # store the minimum square distances #
     min_dist_dict = {i: None for i in range(N)}
     self.out('\nFitting the model...')
     losses = []
     for epoch in range(epochs):
         running_loss = 0
         self.out('EPOCH: {}'.format(epoch + 1))
         for i in self.tqdm(np.random.permutation(N)):
             input1 = X[i]
             # get random elem, diff from i #
             j = i
             while j == i:
                 j = random.randint(0, N - 1)
             input2 = X[j]
             # get minimum distance squares so far #
             if min_dist_dict[i] is None:
                 min_dist_square_i = None
             else:
                 min_dist_square_i = min_dist_dict[i][0]
             if min_dist_dict[j] is None:
                 min_dist_square_j = None
             else:
                 min_dist_square_j = min_dist_dict[j][0]
             # compute similarities #
             sim_i, dist_square = compute_similarity(
                 data_df.iloc[i], data_df.iloc[j], min_dist_square_i)
             sim_j, _ = compute_similarity(data_df.iloc[j], data_df.iloc[i],
                                           min_dist_square_j)
             sim = (sim_i + sim_j) / 2
             sim = sim.reshape((1))
             # pass inputs from model #
             output1 = self.forward(input1)
             output2 = self.forward(input2)
             # update storage #
             if min_dist_dict[i] is None or dist_square < min_dist_dict[i][
                     0]:
                 min_dist_dict[i] = (dist_square, j)
             if min_dist_dict[j] is None or dist_square < min_dist_dict[j][
                     0]:
                 min_dist_dict[j] = (dist_square, i)
             # compute loss and backpropagate #
             loss = criterion(output1, output2, sim)
             optimizer.zero_grad()
             loss.backward()
             optimizer.step()
             running_loss += loss.item()
         self.out('Train loss: {:.2f}'.format(running_loss))
         losses.append(running_loss)
         # test after every epoch #
         if Global.IN_SAMPLE_TESTING:
             self.count_neighbors(data_df, test=False)
     # plot loss per epoch #
     if args['to_plot']:
         losses = np.array(losses)
         self.create_epoch_plot(losses, filename='loss')
     return self
Exemplo n.º 5
0
def process_query(query):
    query_tokens = nltk.word_tokenize(query)
    #print query_tokens
    stop_words = set(nltk.corpus.stopwords.words('english'))
    #print stop_words
    filtered_query = [w for w in query_tokens if not w in stop_words]
    #print filtered_query
    tagged_query = nltk.pos_tag(filtered_query)
    #print tagged_query
    #ner_tagged_query = nltk.ne_chunk(tagged_query)
    #print ner_tagged_query
    #print type(ner_tagged_query)
    proper_nouns = []
    nouns = []
    verbs = []
    adjectives = []
    for i in range(0, len(tagged_query)):
        if 'NNP' == tagged_query[i][1]:
            proper_nouns.append(tagged_query[i][0])
        if 'NN' == tagged_query[i][1] or 'NNS' == tagged_query[i][1]:
            nouns.append(tagged_query[i][0])
        if 'J' in tagged_query[i][1]:
            adjectives.append(tagged_query[i][0])
        if 'V' in tagged_query[i][1]:
            verbs.append(tagged_query[i][0])
    #print proper_nouns
    #print verbs
    #print nouns
    #print adjectives
    target = 'null'
    print ""

    if (len(adjectives) > 0):
        #print "Computing similarity for words in " ,adjectives
        (target, t_word) = similarity.compute_similarity(adjectives)
    if target == 'null':
        if (len(nouns) > 0):
            #print "Computing similarity for words in " ,nouns
            (target, t_word) = similarity.compute_similarity(nouns)
    if target == 'null':
        if (len(verbs) > 0):
            #print "Computing similarity for words in " , verbs
            (target, t_word) = similarity.compute_similarity(verbs)

    #print (target,t_word)

    roi = []
    for x in adjectives + nouns + verbs + proper_nouns:
        if x == t_word:
            continue
        else:
            roi.append(x)

    #print "roi = ",roi
    #print "target = ",target

    if target == 'f':
        place = proper_nouns
        if (len(proper_nouns) == 0):
            #print "Place not recognized"
            #exit(0)
            place = roi
        print "Place: ", str(place[0])
        location = weather.lookup_by_location(str(place[0]))
        condition = location.condition()
        #print("In "+proper_nouns[0]+" it is currently: "+ condition.text())
        forecast = location.forecast()
        print "Date:", forecast[0].date()
        print "Condition:", forecast[0].text()
        print "Current:", str(pytemperature.f2c(int(condition.temp()))), "C"
        print "Highest:", str(pytemperature.f2c(int(forecast[0].high()))), "C"
        print "Lowest:", str(pytemperature.f2c(int(forecast[0].low()))), "C"

    elif target == 'h' or target == 'c' or target == 't':
        place = proper_nouns
        if (len(proper_nouns) == 0):
            #print "Place not recognized"
            #exit(0)
            place = roi
        print "Place: ", str(place[0])
        location = weather.lookup_by_location(str(place[0]))
        condition = location.condition()
        #print type(int(condition.temp()))
        print "In " + place[0] + ", the temperature is: " + str(
            pytemperature.f2c(int(condition.temp()))), "C"

    elif target == 'd':
        definitions = dictionary.meaning(str(roi[0]))
        for k, v in definitions.iteritems():
            for meanings in v:
                print meanings
        print wikipedia.summary(str(roi[0]))
        #roi_p = wikipedia.page(str(roi[0]))
        #print roi_p.content

    elif target == 's':
        syn = (dictionary.synonym(str(roi[0])))
        for i in syn:
            print i
        #todo synonyms
        #print "syn"
    elif target == 'a':
        ant = (dictionary.antonym(str(roi[0])))
        for i in ant:
            print i
        #todo antonyms
        #print "antonym"
    else:
        print "Can't process query"
        exit(0)