def compute_Semantics_2c(): """Tensor decomposition on actor,movie,year and put actor into non-overlapping bins of latent semantics""" print "\n\n" actor_dict = {} act = MovieActor.objects.values_list('actorid', flat=True).distinct() actor_count = act.count() for n, each in enumerate(act): actor_dict[n] = each year_dict = {} yr = MlMovies.objects.values_list('year', flat=True).distinct() year_count = yr.count() for n, each in enumerate(yr): year_dict[n] = each movie_dict = {} mov = MlMovies.objects.values_list('movieid', flat=True).distinct() movie_count = mov.count() for n, each in enumerate(mov): movie_dict[n] = each actorobjs = ImdbActorInfo.objects.values_list('actorid', 'name') actor_mapping = {x[0]: x[1] for x in actorobjs} movieobjs = MlMovies.objects.values_list('movieid', 'moviename') movie_mapping = {x[0]: x[1] for x in movieobjs} # print(actor_count) # print(year_count) # print(movie_count) # print actor_dict # print year_dict # print movie_dict # with open('tag_space_matrix/actor_dict.csv', 'wb') as csv_file: # writer = csv.writer(csv_file) # for key, value in sorted(actor_dict.items(),key=operator.itemgetter(1)): # writer.writerow([value, key]) # with open('tag_space_matrix/year_dict.csv', 'wb') as csv_file: # writer = csv.writer(csv_file) # for key, value in sorted(year_dict.items(),key=operator.itemgetter(1)): # writer.writerow([value, key]) # with open('tag_space_matrix/movie_dict.csv', 'wb') as csv_file: # writer = csv.writer(csv_file) # for key, value in sorted(movie_dict.items(),key=operator.itemgetter(1)): # writer.writerow([value, key]) results = [[[0] * movie_count for i in range(year_count)] for i in range(actor_count)] # print(len(results)) # print(len(results[0])) # print(len(results[0][0])) whole_table = MovieActor.objects.select_related('movieid').all() print("#################") # print(whole_table.count()) inv_a = {v: k for k, v in actor_dict.iteritems()} inv_m = {v: k for k, v in movie_dict.iteritems()} inv_y = {v: k for k, v in year_dict.iteritems()} for row in whole_table: results[inv_a[row.actorid.actorid]][inv_y[row.movieid.year]][inv_m[ row.movieid.movieid]] = 1.0 tensor = T.tensor(np.array(results)) print(tensor) factors = tensorly.decomposition.parafac(tensor, 5) #ACTOR SEMANTICS print(factors[0]) print("AFTER") #col_sums = factors[0].asnumpy().sum(axis=0) x = factors[0] factors[0] = (x.asnumpy() - x.asnumpy().min(0)) / x.asnumpy().ptp(0) print(factors[0]) ls_1 = [] ls_2 = [] ls_3 = [] ls_4 = [] ls_5 = [] # with open('tag_space_matrix/actor_dict.csv', mode='r') as infile: # reader = csv.reader(infile) # actor_dict = {rows[0]:rows[1] for rows in reader} for i in range(len(factors[0])): row = factors[0][i] #print(row) num = np.ndarray.argmax(row) val = max(row) if num == 0: ls_1.append([actor_mapping[actor_dict[i]], val]) if num == 1: ls_2.append([actor_mapping[actor_dict[i]], val]) if num == 2: ls_3.append([actor_mapping[actor_dict[i]], val]) if num == 3: ls_4.append([actor_mapping[actor_dict[i]], val]) if num == 4: ls_5.append([actor_mapping[actor_dict[i]], val]) # for row in query: # ls_5.append([row['name'],val]) print("LATENT SEMANTIC 1") for i in reversed(sorted(ls_1, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 2") for i in reversed(sorted(ls_2, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 3") for i in reversed(sorted(ls_3, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 4") for i in reversed(sorted(ls_4, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 5") for i in reversed(sorted(ls_5, key=lambda x: x[1])): print(i) # MOVIE SEMANTICS x = factors[2] factors[2] = (x.asnumpy() - x.asnumpy().min(0)) / x.asnumpy().ptp(0) ls_1 = [] ls_2 = [] ls_3 = [] ls_4 = [] ls_5 = [] # with open('tag_space_matrix/movie_dict.csv', mode='r') as infile: # reader = csv.reader(infile) # actor_dict = {rows[0]:rows[1] for rows in reader} for i in range(len(factors[2])): row = factors[2][i] #print(row) num = np.ndarray.argmax(row) val = max(row) if num == 0: ls_1.append([movie_mapping[movie_dict[i]], val]) if num == 1: ls_2.append([movie_mapping[movie_dict[i]], val]) if num == 2: ls_3.append([movie_mapping[movie_dict[i]], val]) if num == 3: ls_4.append([movie_mapping[movie_dict[i]], val]) if num == 4: ls_5.append([movie_mapping[movie_dict[i]], val]) print("LATENT SEMANTIC 1") for i in reversed(sorted(ls_1, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 2") for i in reversed(sorted(ls_2, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 3") for i in reversed(sorted(ls_3, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 4") for i in reversed(sorted(ls_4, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 5") for i in reversed(sorted(ls_5, key=lambda x: x[1])): print(i) # YEAR SEMANTICS x = factors[1] factors[1] = (x.asnumpy() - x.asnumpy().min(0)) / x.asnumpy().ptp(0) ls_1 = [] ls_2 = [] ls_3 = [] ls_4 = [] ls_5 = [] for i in range(len(factors[1])): row = factors[1][i] #print(row) num = np.ndarray.argmax(row) val = max(row) if num == 0: ls_1.append([year_dict[i], val]) if num == 1: ls_2.append([year_dict[i], val]) if num == 2: ls_3.append([year_dict[i], val]) if num == 3: ls_4.append([year_dict[i], val]) if num == 4: ls_5.append([year_dict[i], val]) print("LATENT SEMANTIC 1") for i in reversed(sorted(ls_1, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 2") for i in reversed(sorted(ls_2, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 3") for i in reversed(sorted(ls_3, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 4") for i in reversed(sorted(ls_4, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 5") for i in reversed(sorted(ls_5, key=lambda x: x[1])): print(i)
def compute_Semantics_1c(userid): """Tensor decomposition on tag,movie,user and put actor into non-overlapping bins of latent semantics""" print "\n\n" user_limit = 80000 usr_obj = MlUsers.objects.filter(userid__gte=user_limit).distinct() mov_obj = MlMovies.objects.filter(year__gte=2004).distinct() setMovies = MlRatings.objects.filter(movieid__in=mov_obj, userid=userid).values_list("movieid") setMovies = list(set([mov[0] for mov in setMovies])) tag_dict = {} taglist = MlRatings.objects.values_list('rating', flat=True).distinct() tag_count = taglist.count() #tag_count = 6 for n, each in enumerate(taglist): tag_dict[n] = each user_dict = {} #user = MlRatings.objects.values_list('userid', flat=True).distinct()[:6000] user = MlRatings.objects.filter(userid__in=usr_obj).values_list( 'userid', flat=True).distinct() user_count = user.count() for n, each in enumerate(user): user_dict[n] = each movie_dict = {} mov = MlRatings.objects.filter(movieid__in=mov_obj, userid__in=usr_obj).values_list( 'movieid', flat=True).distinct() movie_count = mov.count() for n, each in enumerate(mov): movie_dict[n] = each # tagobjs = GenomeTags.objects.values_list('tagid','tag') # tag_mapping = {x[0]:x[1] for x in tagobjs} # #tags = list(tagobjs) movieobjs = MlMovies.objects.filter(year__gte=2004).values_list( 'movieid', 'moviename') movie_mapping = {x[0]: x[1] for x in movieobjs} print(tag_count) print(movie_count) print(user_count) results = [[[0] * tag_count for i in range(movie_count)] for i in range(user_count)] #whole_table = MlRatings.objects.all()[:2000] whole_table = MlRatings.objects.filter(movieid__in=mov_obj, userid__in=usr_obj) inv_u = {v: k for k, v in user_dict.iteritems()} #print(inv_u) inv_m = {v: k for k, v in movie_dict.iteritems()} inv_t = {v: k for k, v in tag_dict.iteritems()} index = inv_u[userid] #print("whole_table") #print(whole_table.count()) counter = 0 for row in whole_table: #print(counter) counter += 1 # print(inv_u[row.userid.userid],inv_m[row.movieid.movieid],inv_t[row.rating]) results[inv_u[row.userid.userid]][inv_m[row.movieid.movieid]][inv_t[ row.rating]] = 1.0 * row.norm_weight tensor = T.tensor(np.array(results)) factors = tensorly.decomposition.parafac(tensor, 3) recons = tensorly.kruskal_to_tensor(factors) #recons = results #tested for (25,88)g , (30,123)b, (150,497)okish, #index = 25 movie_score = {} user_movie_list = [] print("user: "******"movie: "+str(movie)) for rating in range(len(recons[index][movie])): #print("rating: "+str(rating)) if recons[index][movie][rating].asscalar() > 0.0: if movie in movie_score: movie_score[movie] += float(rating + 1) * (float( recons[index][movie][rating].asscalar())) else: movie_score[movie] = float(rating + 1) * (float( recons[index][movie][rating].asscalar())) if movie not in movie_score: movie_score[movie] = 0.0 #print("Score:") #print(movie_score[movie]) if movie_dict[movie] not in setMovies: user_movie_list.append((movie_dict[movie], movie_score[movie])) #else: #user_movie_list.append((movie_dict[movie],0.0)) breakFlag = True user_movie_dict = {} for k, v in user_movie_list: #print(k,v) user_movie_dict[k] = v print("Watched Movies:") rows = MlRatings.objects.all().filter(userid=user_dict[index]) for row in rows: print(row.movieid.movieid, row.movieid.moviename, row.movieid.genres) return user_movie_dict user_movie_list = list(user_movie_dict.items()) result = list(reversed(sorted(user_movie_list, key=lambda x: x[1]))) till_which = 5 #result = [item for item in list3 if item not in list(setMovies)] print("Watched Movies:") rows = MlRatings.objects.all().filter(userid=user_dict[index]) for row in rows: print(row.movieid.movieid, row.movieid.moviename, row.movieid.genres) print("Recommended:") for a, b in result[:till_which]: mov = MlMovies.objects.get(movieid=a) print(a, mov.moviename, mov.genres, b) feedback = {} for ea, s in result[:till_which]: print("Enter feedback for: " + str(ea) + "...Hit X to exit") feed = int(raw_input()) if feed == 'X': breakFlag = False feedback[ea] = feed movie_vector = getRelevance(feedback) for k, v in movie_vector.items(): if v == 0.0: v = 0.0001 user_movie_dict[k] *= v user_movie_list = list(user_movie_dict.items())
def compute_Semantics_2d(): """Tensor decomposition on actor,movie,year and put actor into non-overlapping bins of latent semantics""" print "\n\n" tag_dict = {} taglist = Task7.objects.values_list('tagid', flat=True).distinct() tag_count = taglist.count() for n, each in enumerate(taglist): tag_dict[n] = each rating_dict = {} rate = Task7.objects.values_list('rating', flat=True).distinct() rating_count = rate.count() # rating_count = 6 for n, each in enumerate(rate): rating_dict[n] = each movie_dict = {} mov = Task7.objects.values_list('movieid', flat=True).distinct() movie_count = mov.count() for n, each in enumerate(mov): movie_dict[n] = each tagobjs = GenomeTags.objects.values_list('tagid', 'tag') tag_mapping = {x[0]: x[1] for x in tagobjs} #tags = list(tagobjs) movieobjs = MlMovies.objects.values_list('movieid', 'moviename') movie_mapping = {x[0]: x[1] for x in movieobjs} # print(tag_count) # print(rating_count) # print(movie_count) # print tag_dict # print rating_dict # print movie_dict # with open('tag_space_matrix/actor_dict.csv', 'wb') as csv_file: # writer = csv.writer(csv_file) # for key, value in sorted(actor_dict.items(),key=operator.itemgetter(1)): # writer.writerow([value, key]) # with open('tag_space_matrix/year_dict.csv', 'wb') as csv_file: # writer = csv.writer(csv_file) # for key, value in sorted(year_dict.items(),key=operator.itemgetter(1)): # writer.writerow([value, key]) # with open('tag_space_matrix/movie_dict.csv', 'wb') as csv_file: # writer = csv.writer(csv_file) # for key, value in sorted(movie_dict.items(),key=operator.itemgetter(1)): # writer.writerow([value, key]) tags = Task7.objects.values_list('tagid', 'movieid', 'rating') results = [[[0] * rating_count for i in range(movie_count)] for i in range(tag_count)] #print(len(results)) #print(len(results[0])) #print(len(results[0][0])) #break inv_t = {v: k for k, v in tag_dict.iteritems()} inv_m = {v: k for k, v in movie_dict.iteritems()} inv_r = {v: k for k, v in rating_dict.iteritems()} for row in tags: #print(inv_t[row[0]]) #row1 = MlRatings.objects.filter(userid=row1.userid.userid) results[inv_t[row[0]]][inv_m[row[1]]][inv_r[row[2]]] = 1.0 tensor = T.tensor(np.array(results)) #print(tensor) factors = tensorly.decomposition.parafac(tensor, 5) #ACTOR SEMANTICS #print(factors) #tucker #factors[0]=factors[1] #factors[1]=factors[2] #factors[2]=factors[3] #print("AFTER") #col_sums = factors[0].asnumpy().sum(axis=0) x = factors[0] #factors[0] = (x.asnumpy() - x.asnumpy().min(0)) / x.asnumpy().ptp(0) factors[0] = (x.asnumpy() - x.asnumpy().min(0)) / (x.asnumpy().max(0) - x.asnumpy().min(0)) #print(factors[0]) ls_1 = [] ls_2 = [] ls_3 = [] ls_4 = [] ls_5 = [] # with open('tag_space_matrix/actor_dict.csv', mode='r') as infile: # reader = csv.reader(infile) # actor_dict = {rows[0]:rows[1] for rows in reader} for i in range(len(factors[0])): row = factors[0][i] #print(row) num = np.ndarray.argmax(row) val = max(row) / sum(row) if num == 0: ls_1.append([tag_mapping[tag_dict[i]], val]) if num == 1: ls_2.append([tag_mapping[tag_dict[i]], val]) if num == 2: ls_3.append([tag_mapping[tag_dict[i]], val]) if num == 3: ls_4.append([tag_mapping[tag_dict[i]], val]) if num == 4: ls_5.append([tag_mapping[tag_dict[i]], val]) # for row in query: # ls_5.append([row['name'],val]) print("\nTag Bins") print("LATENT SEMANTIC 1:") for i in reversed(sorted(ls_1, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 2:") for i in reversed(sorted(ls_2, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 3:") for i in reversed(sorted(ls_3, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 4:") for i in reversed(sorted(ls_4, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 5:") for i in reversed(sorted(ls_5, key=lambda x: x[1])): print(i) # MOVIE SEMANTICS x = factors[1] #factors[1] = (x.asnumpy() - x.asnumpy().min(0)) / x.asnumpy().ptp(0) factors[1] = (x.asnumpy() - x.asnumpy().min(0)) / (x.asnumpy().max(0) - x.asnumpy().min(0)) #print(factors[1]) ls_1 = [] ls_2 = [] ls_3 = [] ls_4 = [] ls_5 = [] # with open('tag_space_matrix/movie_dict.csv', mode='r') as infile: # reader = csv.reader(infile) # actor_dict = {rows[0]:rows[1] for rows in reader} for i in range(len(factors[1])): row = factors[1][i] #print(row) num = np.ndarray.argmax(row) val = max(row) / sum(row) if num == 0: ls_1.append([movie_mapping[movie_dict[i]], val]) if num == 1: ls_2.append([movie_mapping[movie_dict[i]], val]) if num == 2: ls_3.append([movie_mapping[movie_dict[i]], val]) if num == 3: ls_4.append([movie_mapping[movie_dict[i]], val]) if num == 4: ls_5.append([movie_mapping[movie_dict[i]], val]) print("\nMovie Bins") print("LATENT SEMANTIC 1") for i in reversed(sorted(ls_1, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 2") for i in reversed(sorted(ls_2, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 3") for i in reversed(sorted(ls_3, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 4") for i in reversed(sorted(ls_4, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 5") for i in reversed(sorted(ls_5, key=lambda x: x[1])): print(i) # YEAR SEMANTICS x = factors[2] factors[2] = (x.asnumpy() - x.asnumpy().min(0)) / (x.asnumpy().max(0) - x.asnumpy().min(0)) ls_1 = [] ls_2 = [] ls_3 = [] ls_4 = [] ls_5 = [] #print(len(factors[2])) for i in range(len(factors[2])): row = factors[2][i] #print(row) num = np.ndarray.argmax(row) val = max(row) / sum(row) if num == 0: ls_1.append([rating_dict[i], val]) if num == 1: ls_2.append([rating_dict[i], val]) if num == 2: ls_3.append([rating_dict[i], val]) if num == 3: ls_4.append([rating_dict[i], val]) if num == 4: ls_5.append([rating_dict[i], val]) print("\nRating Bins") print("LATENT SEMANTIC 1") for i in reversed(sorted(ls_1, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 2") for i in reversed(sorted(ls_2, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 3") for i in reversed(sorted(ls_3, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 4") for i in reversed(sorted(ls_4, key=lambda x: x[1])): print(i) print("LATENT SEMANTIC 5") for i in reversed(sorted(ls_5, key=lambda x: x[1])): print(i)
from tensorly.datasets.synthetic import gen_image from tensorly.random import check_random_state from tensorly.regression.kruskal_regression import KruskalRegressor import tensorly.backend as T # Parameter of the experiment image_height = 25 image_width = 25 # shape of the images patterns = ['rectangle', 'swiss', 'circle'] # ranks to test ranks = [1, 2, 3, 4, 5] # Generate random samples rng = check_random_state(1) X = T.tensor(rng.normal(size=(1000, image_height, image_width), loc=0, scale=1)) # Paramters of the plot, deduced from the data n_rows = len(patterns) n_columns = len(ranks) + 1 # Plot the three images fig = plt.figure() for i, pattern in enumerate(patterns): # Generate the original image weight_img = gen_image(region=pattern, image_height=image_height, image_width=image_width) weight_img = T.tensor(weight_img)
# -*- coding: utf-8 -*- """ Basic tensor operations ======================= Example on how to use :mod:`tensorly.base` to perform basic tensor operations. """ import matplotlib.pyplot as plt from tensorly.base import unfold, fold import numpy as np import tensorly.backend as T ########################################################################### # A tensor is simply a numpy array tensor = T.tensor(np.arange(24).reshape((3, 4, 2))) print('* original tensor:\n{}'.format(tensor)) ########################################################################### # Unfolding a tensor is easy for mode in range(tensor.ndim): print('* mode-{} unfolding:\n{}'.format(mode, unfold(tensor, mode))) ########################################################################### # Re-folding the tensor is as easy: for mode in range(tensor.ndim): unfolding = unfold(tensor, mode) folded = fold(unfolding, mode, tensor.shape) T.assert_array_equal(folded, tensor)
def decomp_plot(edge_len=25, iterations=[1, 2, 3, 4], ranks=[1, 5, 25, 50, 125, 130, 150, 200], decomp='CP'): #Params print(ranks) #Generate random samples rng = check_random_state(7) X = T.tensor(rng.normal(size=(1000, edge_len, edge_len), loc=0, scale=1)) #For plotting n_rows = len(iterations) n_columns = len(ranks) + 1 fig = plt.figure() for i, _ in enumerate(iterations): #Generate tensor weight_img = X[i * edge_len:(i + 1) * edge_len, :, :] ax = fig.add_subplot(n_rows, n_columns, i * n_columns + 1) #Plot image corresponding to 3-D Tensor ax.imshow(T.to_numpy(np.sum(weight_img, axis=0)), cmap=plt.cm.OrRd, interpolation='nearest') ax.set_axis_off() if i == 0: ax.set_title('Original') for j, rank in enumerate(ranks): #Tensor decomposition, image_edge x rank (25x1, 25x5, 25x25 ...) if decomp == 'CP': #CP decomposition components = parafac(weight_img, rank=rank) ax = fig.add_subplot(n_rows, n_columns, i * n_columns + j + 2) # Aggregate the factors for visualization simg = np.sum(components[k] for k in range(len(components))) ax.imshow(T.to_numpy(simg), cmap=plt.cm.OrRd, interpolation='nearest') ax.text(.5, 2.0, '{:.2f}'.format( tensor_distance(kruskal_to_tensor(components), weight_img)), color='r') # ax.set_autoscaley_on(False) ax.set_axis_off() else: #Tucker decomposition components, f = tucker(weight_img, ranks=[3, 25, rank]) #print(components.shape) ax = fig.add_subplot(n_rows, n_columns, i * n_columns + j + 2) # Aggregate the factors for visualization simg = np.sum(components[k] for k in range(len(components))) ax.imshow(T.to_numpy(simg), cmap=plt.cm.OrRd, interpolation='nearest') ax.text(.5, 2.0, '{:.2f}'.format( tensor_distance(kruskal_to_tensor(components), weight_img)), color='r') # ax.set_autoscaley_on(False) ax.set_axis_off() if i == 0: ax.set_title('\n{}'.format(rank)) plt.suptitle('Tensor Decompositions') plt.show()
from scipy.misc import face, imresize from tensorly.decomposition import non_negative_parafac from tensorly.decomposition import non_negative_tucker from tensorly.decomposition import tucker from math import ceil from tensorly.base import tensor_to_vec, partial_tensor_to_vec from tensorly.datasets.synthetic import gen_image from tensorly.random import check_random_state from tensorly.regression.kruskal_regression import KruskalRegressor import tensorly.backend as T import time tl.set_backend('numpy') rng = check_random_state(1) X = T.tensor(rng.normal(size=(1000, 1000, 1000), loc=0, scale=1)) start_time = time.time() core, tucker_factors = non_negative_tucker(X, rank=[10,10,10], init='svd', tol=10e-12, verbose=True, n_iter_max=2) print("--- %s seconds ---" % (time.time() - start_time)) tl.set_backend('mxnet') rng = check_random_state(1) X = T.tensor(rng.normal(size=(1000, 1000, 1000), loc=0, scale=1)) start_time = time.time() core, tucker_factors = non_negative_tucker(X, rank=[10,10,10], init='svd', tol=10e-12, verbose=True, n_iter_max=2) print("--- %s seconds ---" % (time.time() - start_time)) tl.set_backend('pytorch') rng = check_random_state(1) X = T.tensor(rng.normal(size=(1000, 1000, 1000), loc=0, scale=1)) start_time = time.time()