def main(): if len(sys.argv) < 2: print('Expected arguments are not provided.') return genre = sys.argv[1] no_of_components = 4 genome_tags = util.read_genome_tags() #print genome_tags tf_idf_matrix = util.get_tf_idf_matrix(genre) tagid_list = list(tf_idf_matrix.columns.values) tag_list = genome_tags[genome_tags['tagId'].isin(tagid_list)]['tag'].tolist() #print tag_list svd = TruncatedSVD(n_components=no_of_components, n_iter=100, random_state=None) svd.fit(tf_idf_matrix) concepts = [] for i in range(no_of_components): concept = [] for j, component in enumerate(svd.components_[i]): concept.append((tag_list[j], component)) concept.sort(key=lambda tup: abs(tup[1]), reverse=True) concepts.append(concept) util.print_output(genre, concepts) util.write_output_file(genre, concepts, output_file)
def main(): if len(sys.argv) < 2: print('Expected arguments are not provided.') return genre = sys.argv[1] no_of_components = 4 imdb_actor_info = util.read_imdb_actor_info() #print imdb_actor_info tf_idf_matrix = util.get_tf_idf_matrix(genre) actor_list = list(tf_idf_matrix.columns.values) actor_list = imdb_actor_info[imdb_actor_info['id'].isin(actor_list)]['name'].tolist() #print actor_list pca = PCA(n_components=no_of_components) pca.fit(tf_idf_matrix) concepts = [] for i in range(no_of_components): concept = [] for j, component in enumerate(pca.components_[i]): concept.append((actor_list[j], component)) concept.sort(key=lambda tup: abs(tup[1]), reverse=True) concepts.append(concept) util.print_output(genre, concepts) util.write_output_file(genre, concepts, output_file)
def main(): if len(sys.argv) < 2: print('Expected arguments are not provided.') return movieid = int(sys.argv[1]) mlmovies = util.read_mlmovies() movie_actors = util.read_movie_actor() imdb_actor_info = util.read_imdb_actor_info() input_movie = mlmovies[mlmovies['movieid'] == movieid]['moviename'].values[0] actors_of_movie = movie_actors.where(movie_actors['movieid']==movieid).dropna().loc[:,'actorid'].unique() #print (actors_of_movie) movie_matrix = util.get_movie_tf_idf_matrix() actor_matrix = util.get_actor_tf_idf_matrix() #print(actor_matrix.shape) input_movie_vector = pd.DataFrame(movie_matrix.loc[movieid])#.transpose() #print(input_movie_vector.shape) similarity_matrix = actor_matrix.dot(input_movie_vector) similarity_matrix = similarity_matrix[~similarity_matrix.index.isin(actors_of_movie)] #print(similarity_matrix) actors = [] for index, row in similarity_matrix.iterrows(): actor_name = imdb_actor_info[imdb_actor_info['id'] == index]['name'].values[0] actors.append((index, actor_name, similarity_matrix.loc[index][movieid])) actors.sort(key=lambda tup: tup[2], reverse=True) #print (actors) util.print_output(movieid, input_movie, actors[:no_of_actors]) util.write_output_file(movieid, input_movie, actors[:no_of_actors], output_file)
def main(): if len(sys.argv) < 2: print('Expected arguments are not provided.') return actorid = int(sys.argv[1]) imdb_actor_info = util.read_imdb_actor_info() input_actor = imdb_actor_info[imdb_actor_info['id'] == actorid]['name'].values[0] tf_idf_matrix = util.get_tf_idf_matrix() #print (tf_idf_matrix) input_actor_tf_idf = tf_idf_matrix.loc[actorid] #print (input_actor_tf_idf) actors = [] for index, row in tf_idf_matrix.iterrows(): actor_name = imdb_actor_info[imdb_actor_info['id'] == index]['name'].values[0] actors.append((index, actor_name, 1 - cosine(row, input_actor_tf_idf))) other_actors = list(filter(lambda tup: tup[0] != actorid, actors)) other_actors.sort(key=lambda tup: tup[2], reverse=True) util.print_output(actorid, input_actor, other_actors[:no_of_actors]) util.write_output_file(actorid, input_actor, other_actors[:no_of_actors], output_file)
def main(): if len(sys.argv) < 2: print('Expected arguments are not provided.') return actorid = int(sys.argv[1]) imdb_actor_info = util.read_imdb_actor_info() input_actor_name = imdb_actor_info[imdb_actor_info['id'] == actorid]['name'].values[0] tf_idf_matrix = util.get_tf_idf_matrix() #print(tf_idf_matrix) actor_tf_idf = tf_idf_matrix.loc[actorid] #print(actor_tf_idf) svd = SVD(n_components=no_of_components) svd.fit(tf_idf_matrix) svd_df = pd.DataFrame(svd.transform(tf_idf_matrix), index=tf_idf_matrix.index) input_actor_row = svd_df.loc[actorid] actors = [] for index, row in svd_df.iterrows(): name = imdb_actor_info[imdb_actor_info['id'] == index]['name'].values[0] actors.append((index, name, 1 - cosine(row, input_actor_row))) other_actors = list(filter(lambda tup: tup[0] != actorid, actors)) other_actors.sort(key=lambda tup: tup[2], reverse=True) util.print_output(actorid, input_actor_name, other_actors[:no_of_actors]) util.write_output_file(actorid, input_actor_name, other_actors[:no_of_actors], output_file)
def latent_rating_semantics(rating_matrix): mlratings = util.read_mlratings() ratings_list = mlratings.rating.unique() concepts = [] for i in range(no_of_components): concept = [] for j, component in enumerate(np.transpose(rating_matrix)[i]): concept.append((ratings_list[j], component)) concept.sort(key=lambda tup: abs(tup[1]), reverse=True) concepts.append(concept) util.print_output(concepts, 'Rating') util.write_output_file(concepts, output_file, 'Rating')
def latent_year_semantics(year_matrix): mlmovies = util.read_mlmovies() year_list = mlmovies.year.unique() concepts = [] for i in range(no_of_components): concept = [] for j, component in enumerate(np.transpose(year_matrix)[i]): concept.append((year_list[j], component)) concept.sort(key=lambda tup: abs(tup[1]), reverse=True) concepts.append(concept) util.print_output(concepts, 'Year') util.write_output_file(concepts, output_file, 'Year')
def alice(filename): socket = util.ClientSocket() with open(filename) as json_file: json_circuits = json.load(json_file) print() for json_circuit in json_circuits['circuits']: circuit = yao.Circuit() #Parse and fill circuit circuit.parseJson(json_circuit) print("======= "+ circuit.Name + " =======") #Create random p values p_values = {} for wire in circuit.Wires: p_values[wire] = random.randint(0,1) #Generate keys for each wire keys = {} for wire in circuit.Wires: keys[wire] = (Fernet.generate_key(), Fernet.generate_key()) #Create table create_garble_tables(circuit, p_values, keys) #Generate value and key for all possible bob values for circuit all_bob_values = ot.generate_all_bob_values(circuit.Bob, p_values, keys) #Send all combinations to Bob for Alice_values in util.create_all_combination(len(circuit.Alice)): Alice_pvalues = list(map(lambda x, y: x ^ p_values[y], Alice_values, circuit.Alice)) Alice_pvalues = list(map(lambda x, y: (x, keys[y][x]), Alice_pvalues, circuit.Alice)) output_pvalues = list(map(lambda x: p_values[x], circuit.Outputs)) socket.send_wait((circuit, Alice_pvalues, output_pvalues)) #Check if bob values exist if (not len(circuit.Bob)): #Get output from bob output = socket.send_wait("Get output") util.print_output(circuit, Alice_values, [], output) else: for Bob_values in util.create_all_combination(len(circuit.Bob)): #send bob p value via OT ot.send_bob_values(circuit.Bob, all_bob_values, socket) #Get output from bob output = socket.send_wait("Get output") util.print_output(circuit, Alice_values, Bob_values, output) print()
def latent_movie_semantics(movie_matrix): mlmovies = util.read_mlmovies() movies_list = mlmovies.movieid.unique() movies_list = mlmovies[mlmovies['movieid'].isin( movies_list)]['moviename'].tolist() concepts = [] for i in range(no_of_components): concept = [] for j, component in enumerate(np.transpose(movie_matrix)[i]): concept.append((movies_list[j], component)) concept.sort(key=lambda tup: abs(tup[1]), reverse=True) concepts.append(concept) util.print_output(concepts, 'Movie') util.write_output_file(concepts, output_file, 'Movie')
def latent_actor_semantics(actor_matrix): imdb_actor_info = util.read_imdb_actor_info() actor_list = imdb_actor_info.id.unique() actor_list = imdb_actor_info[imdb_actor_info['id'].isin( actor_list)]['name'].tolist() concepts = [] for i in range(no_of_components): concept = [] for j, component in enumerate(np.transpose(actor_matrix)[i]): concept.append((actor_list[j], component)) concept.sort(key=lambda tup: abs(tup[1]), reverse=True) concepts.append(concept) util.print_output(concepts, 'Actor') util.write_output_file(concepts, output_file, 'Actor')
def local_test(filename): with open(filename) as json_file: json_circuits = json.load(json_file) print() for json_circuit in json_circuits['circuits']: circuit = yao.Circuit() circuit.parseJson(json_circuit) print("======= "+ circuit.Name + " =======") #Create random p values p_values = {} for wire in circuit.Wires: p_values[wire] = random.randint(0,1) #Generate keys for each wire keys = {} for wire in circuit.Wires: keys[wire] = (Fernet.generate_key(), Fernet.generate_key()) #Create table create_garble_tables(circuit, p_values, keys) #Try evaluate for Alice_values in util.create_all_combination(len(circuit.Alice)): Alice_pval = list(map(lambda x, y: x ^ p_values[y], Alice_values, circuit.Alice)) Alice_pval = list(map(lambda x, y: (x, keys[y][x]), Alice_pval, circuit.Alice)) output_pval = list(map(lambda x: p_values[x], circuit.Outputs)) if (not len(circuit.Bob)): outputs = evaluate(Alice_pval, [], circuit, output_pval) util.print_output(circuit, Alice_values, [], outputs) for Bob_values in util.create_all_combination(len(circuit.Bob)): Bob_pval = list(map(lambda x, y: x ^ p_values[y], Bob_values, circuit.Bob)) Bob_pval = list(map(lambda x, y: (x, keys[y][x]), Bob_pval, circuit.Bob)) outputs = evaluate(Alice_pval, Bob_pval, circuit, output_pval) # Write output util.print_output(circuit, Alice_values, Bob_values, outputs) print()
def latent_tag_semantics(tag_matrix): mltags = util.read_mltags() genome_tags = util.read_genome_tags() mltags = pd.merge(mltags, genome_tags, left_on='tagid', right_on='tagId', how='inner') tags_list = mltags.tagid.unique() tags_list = mltags[mltags['tagid'].isin(tags_list)]['tag'].tolist() concepts = [] for i in range(no_of_components): concept = [] for j, component in enumerate(np.transpose(tag_matrix)[i]): concept.append((tags_list[j], component)) concept.sort(key=lambda tup: abs(tup[1]), reverse=True) concepts.append(concept) util.print_output(concepts, 'Tag') util.write_output_file(concepts, output_file, 'Tag')
def monitor(thread_util, sub_tasks): unique_id = util.short_unique_id() exit_event = multiprocessing.Event() waiting_tasks = [] logging.debug('Manager - Starting ID [%s].', str(unique_id)) while not exit_event.is_set() or not thread_util.is_kill_event_set(): non_finished_tasks = [] if sub_tasks: for (sub_task, process) in sub_tasks: return_code = process.poll() if return_code is not None: # process finished thread_util.remove_process(sub_task.get_parent().id, process.pid) (std_out, std_err) = process.communicate() if return_code == 0: # sub_task finished successfully logging.info('Manager - FINISHED - Task [%s], SubTask [%s].', str(sub_task.get_parent().name), str(sub_task.id)) util.print_output(std_err, std_out) if sub_task.get_parent().wait: # should we wait for others to finish? logging.info('Manager - Waiting for other Tasks to finish.') waiting_tasks.append((sub_task, process)) continue else: # good to go if sub_task.get_parent().has_children(): logging.info('Manager - No need to wait for other processes to finish.') for (s, p) in sub_tasks: util.kill_process(p.pid) thread_util.remove_process(s.get_parent().id, p.pid) logging.info('Manager - Task has Children. Sending Tasks to Processing Queue.') for task in sub_task.get_parent().get_children(): thread_util.add_task(task) exit_event.set() break else: util.print_task_tree(sub_task.get_parent()) logging.info('Manager - Job Finished with success.') exit_event.set() thread_util.kill(0) exit(0) else: # failed tasks goes here logging.info('Manager - FINISHED - Task Failure [%s], SubTask [%s].', str(sub_task.get_parent().name), str(sub_task.id)) util.print_output(std_err, std_out) if sub_task.get_parent().fail_tolerant: logging.info('Manager - The Task is Fail Tolerant.') if thread_util.has_running_processes(sub_task.get_parent().id): # should we wait for others to finish? logging.info('Manager - Waiting for other Tasks to finish.') continue else: # good to go if sub_task.get_parent().has_children(): logging.info('Manager - No need to wait for other processes to finish.') for (s, p) in sub_tasks: util.kill_process(p.pid) thread_util.remove_process(s.get_parent().id, p.pid) logging.info('Manager - Task has Children. Sending Tasks to Processing Queue.') for task in sub_task.get_parent().get_children(): thread_util.add_task(task) exit_event.set() else: util.print_task_tree(sub_task.get_parent()) logging.info( "Manager - Job Finished with success, but Fail Tolerance has been applied.") exit_event.set() thread_util.kill(0) exit(0) elif sub_task.get_parent().wait: # hum this task failed and it seems to be waiting for # the output of another at the same level, the most probable scenario # is that it won't work from here on. Better to kill the Job now. logging.info('Manager - Job Finished with errors.') exit_event.set() thread_util.kill(1) exit(1) else: # hum we cannot proceed to the children tasks because this one failed # lets see if the Job has still tasks running if thread_util.has_running_processes(sub_task.get_parent().id): # OK fine, there are still other tasks at the same level running continue else: # seems we were waiting for this one to complete # better to kill this now logging.info('Manager - Job Finished with errors.') exit_event.set() thread_util.kill(2) exit(2) else: non_finished_tasks.append((sub_task, process)) else: # are there tasks waiting for others to finish? if waiting_tasks: for (sub_task, process) in waiting_tasks: if sub_task.get_parent().has_children(): logging.info('Manager - Task has Children. Sending Tasks to self.manager.') thread_util.add_task(sub_task.get_parent().get_children()) exit_event.set() break else: util.print_task_tree(sub_task.get_parent()) logging.info('Manager - Job Finished with success.') exit_event.set() thread_util.kill(0) exit(0) else: exit_event.set() break # remove elements that were already processed sub_tasks = non_finished_tasks logging.debug('Manager - Finished ID [%s].', str(unique_id)) return
def main(args): data = DataLoader(pca=args.PCA, norm=args.norm) train_captions, train_feature, train_url, train_len = data.get_Training_data( args.training) test_captions, test_feature, test_url, test_len = data.get_val_data( args.testing) f, c, _ = data.eval_data() writer = SummaryWriter() encoder = Encoder(input_size=train_feature.shape[1], hidden_size=args.hidden_size) \ .to(device) decoder = Decoder(embed_size=args.embed_size, hidden_size=args.hidden_size, attention_dim=args.attention_size, vocab_size=len(data.word_to_idx)) \ .to(device) if args.load_weight: load_weights(encoder, args.model_path + "Jul28_10-04-57encoder") load_weights(decoder, args.model_path + "Jul28_10-04-57decoder") for epoch in range(args.num_epochs): params = list(decoder.parameters()) + list(encoder.parameters()) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(params=params, lr=args.learning_rate) # if epoch >= 100: training_loss = step(encoder=encoder, decoder=decoder, criterion=criterion, data=(train_captions, train_feature, train_len), optimizer=optimizer) # if epoch + 1 % 5 == 0: # a = evaluate(encoder, decoder, train_feature[0:2], train_captions[0:2], 5, data.word_to_idx) # print("bleu4 ", a) with torch.no_grad(): test_loss = step(encoder=encoder, decoder=decoder, criterion=criterion, data=(test_captions, test_feature, test_len)) # if epoch > 1: b1, b2, b3, b4 = evaluate(encoder, decoder, f, c, 5, data.word_to_idx, data.idx_to_word) writer.add_scalars('BLEU', { 'BLEU1': b1, 'BLEU2': b2, 'BLEU3': b3, 'BLEU4': b4 }, epoch + 1) if (epoch % 30) == 0: save_weights(encoder, args.model_path + "encoder" + str(epoch)) save_weights(decoder, args.model_path + "decoder" + str(epoch)) writer.add_scalars('loss', { 'train': training_loss, 'val': test_loss }, epoch + 1) print( 'Epoch [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}, TestLoss: {:.4f}, TestPerplexity: {:5.4f}' .format(epoch + 1, args.num_epochs, training_loss, np.exp(training_loss), test_loss, np.exp(test_loss))) args.learning_rate *= 0.995 if args.save_weight: save_weights(encoder, args.model_path + "encoder" + str(epoch)) save_weights(decoder, args.model_path + "decoder" + str(epoch)) if args.save_weight: save_weights(encoder, args.model_path + "encoder") save_weights(decoder, args.model_path + "decoder") if args.predict: sample = Sample(encoder=encoder, decoder=decoder, device=device) train_mask = [ random.randint(0, train_captions.shape[0] - 1) for _ in range(args.numOfpredection) ] test_mask = [ random.randint(0, test_captions.shape[0] - 1) for _ in range(args.numOfpredection) ] train_featur = torch.from_numpy(train_feature[train_mask]) train_featur = train_featur.to(device) train_encoder_out = encoder(train_featur) test_featur = torch.from_numpy(test_feature[test_mask]) test_featur = test_featur.to(device) test_encoder_out = encoder(test_featur) train_output = [] test_output = [] for i in range(len(test_mask)): print(i) pre = sample.caption_image_beam_search( train_encoder_out[i].reshape(1, args.embed_size), data.word_to_idx, 2) train_output.append(pre) pre = sample.caption_image_beam_search( test_encoder_out[i].reshape(1, args.embed_size), data.word_to_idx, 50) test_output.append(pre) print_output(output=test_output, sample=0, gt=test_captions[test_mask], img=test_url[test_mask], title="val", show_image=args.show_image, idx_to_word=data.idx_to_word) print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX") print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX") print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX") print("") print_output(output=train_output, sample=0, gt=train_captions[train_mask], img=train_url[train_mask], title="traning", show_image=args.show_image, idx_to_word=data.idx_to_word)
random_state=None) svd.fit(cc_matrix) normalizer = Normalizer(copy=False) lsa = make_pipeline(svd, normalizer) dataC = lsa.fit_transform(cc_matrix) #print dataC concepts = [] for i in range(no_of_components): concept = [] for j, component in enumerate(svd.components_[i]): concept.append((actor_list[j], component)) concept.sort(key=lambda tup: tup[1], reverse=True) concepts.append(concept) util.print_output(task, concepts) util.write_output_file(task, concepts, output_file) cluster_rule = KMeans(n_clusters=3) cluster_rule.fit(dataC) labels = cluster_rule.predict(dataC) centroids = cluster_rule.cluster_centers_ #print labels print('\n') print("Centroids of 3 new clusters: \n") print(centroids) print('\n') print("Clustered actors into the 3 groups (0 : 1 :2) \n") for i, j in zip(actor_list, labels):