def use_case_kmeans(G, users_skills, clusters_ground_truth): clustering_range = (2, 10) distance_function = "euclidean" print("Clustering") print("Using KMeans") clustering_model = clustering(users_skills, range(*clustering_range), True) print("- Number of clusters found", len(clustering_model.cluster_centers_)) print("- Real number of clusters", len(np.unique(clusters_ground_truth))) users_distances_to_centers = cdist( users_skills, clustering_model.cluster_centers_, metric=distance_function) print("Link prediction") model, y_train, predicted_train, y_test, predicted_test = link_prediction( G, users_distances_to_centers) print("Evaluation") print("- Train") print_evaluate(y_train, predicted_train) print("- Test") print_evaluate(y_test, predicted_test) print("Visualization") visualization(model, G, users_distances_to_centers, clustering_model.labels_)
def embedded_query_expansion_qi(self, interpolated_aplpha, m): query_embedded = self.query_embedded query_wordcount = self.query_wordcount collection = self.collection collection_total_similarity = self.collection_total_similarity word2vec = self.word2vec # copy query model query_model = Pickle.load(open("model/query_model.pkl", "rb")) embedded_query_expansion = query_model update_embedded_query_expansion = {} if os.path.isfile("model/update_embedded_query_expansion_qi.pkl") == True: # check if a file exist update_embedded_query_expansion = Pickle.load(open("model/update_embedded_query_expansion_qi.pkl", "rb")) else: # calculate every query for query, query_word_count_dict in query_wordcount.items(): top_prob_dict = {} # calculate every word in collection for word in collection.keys(): # for every word in current query query_length = ProcDoc.word_sum(query_word_count_dict) * 1.0 # p(w|q) p_w_q = 0 if not word in query_word_count_dict: for word_sq, word_sq_count in query_word_count_dict.items(): total_probability = collection_total_similarity[word_sq] if word_sq in query_embedded: cur_word_similarity = word2vec.getWordSimilarity(collection[word], query_embedded[word_sq]) p_w_q += (cur_word_similarity / total_probability ) * (word_sq_count / query_length) # storage probability top_prob_dict[word] = p_w_q # softmax top_prob_dict = ProcDoc.softmax(top_prob_dict) # sorted top_prob_dict by value(probability) top_prob_list = sorted(top_prob_dict.items(), key=operator.itemgetter(1), reverse = True) # storage update query model value update_embedded_query_expansion[query] = top_prob_list Pickle.dump(update_embedded_query_expansion, open("model/update_embedded_query_expansion_qi.pkl", "wb"), True) # update query model for update_query, update_query_word_list in update_embedded_query_expansion.items(): filepath = "visual/" + update_query + "_qi.png" if os.path.isfile(filepath) == False: visualization.visualization(collection, update_query_word_list, filepath) for update_word, update_count in update_query_word_list[:m]: update = update_count origin = 0 if update_word in query_model[update_query]: origin = query_model[update_query][update_word] query_model[update_query].pop(update_word, None) embedded_query_expansion[update_query][update_word] = interpolated_aplpha * origin + (1 - interpolated_aplpha) * update for un_changed_word in query_model[update_query].keys(): embedded_query_expansion[update_query][un_changed_word] *= interpolated_aplpha # softmax embedded_query_expansion[update_query] = ProcDoc.softmax(embedded_query_expansion[update_query]) return embedded_query_expansion
def main(): dim = (100, 100) vis = visualization(dim, 5) running = True n = 25 X = [generate_points(dim, box(0, 0, dim[0], dim[1]), 15) for i in range(n)] E = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 1), (3, 4), (4, 0)] E = get_CDAG(n - 1) E = get_minimal_DAG(n - 1) D = [None for i in range(n)] D = GreedyDistance(X, E, D) D_2 = GreedyFeedBack(X, E, [None for i in range(n)]) print(coverage(D, 10), coverage(D_2, 10)) t = [0] v = [coverage(D, 10)] vis.draw_circles(D, 10, "BLACK") for x in X: vis.draw_circles(x, 2, "RED") vis.update() reDraw(vis, D, X) while running: for event in pygame.event.get(): if event.type == pygame.QUIT: running = False elif event.type == pygame.MOUSEBUTTONUP: D = GreedyFeedBack(X, E, D) t.append(t[-1] + 1) v.append(coverage(D, 10)) reDraw(vis, D, X) plt.plot(v) plt.show() print("redraw")
def __init__(self): self.grid = grid() self.__vis = visualization(self.grid.grid) self.changedcounter = 0 self.iteration_counter = 0 self.reward_in = 0 self.filled_before = np.count_nonzero(self.grid.grid)
def draw_matchsticks(self, exp_id, camera_id): """ """ v = visualization.visualization() exp = experiment.experiment(new_experiment=False, ts=exp_id) room_name = exp.metadata["room"] if room_name.lower() == "cears": room_id = 1 elif room_name.lower() == "computer_lab": room_id = 0 devices = self.rooms[room_id]["devices"] devices.sort() try: cam_name = os.path.basename(devices[int(camera_id)]) except Exception as e: print e self.app.logger.info(e) return "No cam found sorry!" fcamera_path = os.path.join("/dev/v4l/by-id", cam_name) nframes = exp.metadata["number_of_images"][fcamera_path] ret_combined = "" for frame_id in range(nframes): ret = self.skeletons(exp_id, camera_id, frame_id) ret_combined += "<br>" + ret return ret_combined
def index(): for image_name in glob.iglob(DATABASE_FOLDER + '/*.jpg'): os.remove(image_name) global images, desc, statistics, extra_statistics, extra_extra_statistics, threshold, file_name if len(sys.argv) == 2 and int((sys.argv)[1]) == 1: file_name = 'general_output_1.txt' else: file_name = 'general_output.txt' images = [] statistics, extra_statistics, extra_extra_statistics, threshold = calculate_statistics() visualization(file=file_name, mode_1='correct', mode_2='before') im = imread(DATABASE_FOLDER + 'correct_before.jpg', mode='RGB') new_file_name = rand_str(20) imsave(DATABASE_FOLDER + new_file_name + '.jpg', im) images.append(new_file_name + '.jpg') visualization(file=file_name, mode_1='incorrect', mode_2='before') im = imread(DATABASE_FOLDER + 'incorrect_before.jpg', mode='RGB') new_file_name = rand_str(20) imsave(DATABASE_FOLDER + new_file_name + '.jpg', im) images.append(new_file_name + '.jpg') logistic_regression(file=file_name, mode='before') im = imread(DATABASE_FOLDER + 'LR_before_pp.jpg', mode='RGB') new_file_name = rand_str(20) imsave(DATABASE_FOLDER + new_file_name + '.jpg', im) images.append(new_file_name + '.jpg') visualization(file=file_name, mode_1='correct', mode_2='after') im = imread(DATABASE_FOLDER + 'correct_after.jpg', mode='RGB') new_file_name = rand_str(20) imsave(DATABASE_FOLDER + new_file_name + '.jpg', im) images.append(new_file_name + '.jpg') visualization(file=file_name, mode_1='incorrect', mode_2='after') im = imread(DATABASE_FOLDER + 'incorrect_after.jpg', mode='RGB') new_file_name = rand_str(20) imsave(DATABASE_FOLDER + new_file_name + '.jpg', im) images.append(new_file_name + '.jpg') logistic_regression(file=file_name, mode='after') im = imread(DATABASE_FOLDER + 'LR_after_pp.jpg', mode='RGB') new_file_name = rand_str(20) imsave(DATABASE_FOLDER + new_file_name + '.jpg', im) images.append(new_file_name + '.jpg') desc = ['Correct prediction before post-processing', 'Wrong prediction before post-processing', 'Logistic Regression before post-processing', 'Correct prediction after post-processing', 'Wrong prediction after post-processing', 'Logistic Regression after post-processing'] return render_template("statistics.html", image_name=images, descriptions=desc, first_thres=threshold[0], second_thres=threshold[1], third_thres=5, statistics=statistics, extra_statistics=extra_statistics, extra_extra_statistics=extra_extra_statistics)
def make_videofrom_matchsticks(self, exp_id, camera_id, fps): """ """ v = visualization.visualization() exp = experiment.experiment(new_experiment=False, ts=exp_id) room_name = exp.metadata["room"] if room_name.lower() == "cears": room_id = 1 elif room_name.lower() == "computer_lab": room_id = 0 devices = self.rooms[room_id]["devices"] devices.sort() try: cam_name = os.path.basename(devices[int(camera_id)]) except Exception as e: print e self.app.logger.info(e) return "No cam found sorry!" fcamera_path = os.path.join("/dev/v4l/by-id", cam_name) nframes = exp.metadata["number_of_images"][fcamera_path] exp_path = self.um.experiment_path(str(exp_id)) fourcc = cv2.VideoWriter_fourcc(*'XVID') pathout = os.path.join(exp_path, "output/pose/video") try: self.um.create_folder(pathout) except: pass pathout = os.path.join(pathout, cam_name) try: self.um.create_folder(pathout) except: pass pathout = os.path.join(pathout, "video.avi") print pathout self.app.logger.info(pathout) out = cv2.VideoWriter(pathout, fourcc, fps, (800, 600)) for frame_id in range(nframes): figure = os.path.join(exp_path, "output/pose/img", cam_name, "matchstick_" + str(frame_id) + ".png") self.app.logger.info(figure) img_ = cv2.imread(figure) if img_ is None: return "I think you forgot to draw the matchsticks!" cv2.resize(img_, (800, 600)) out.write(img_) out.release() return "done"
def use_case_fuzzy_cmean(G, users_skills, clusters_ground_truth): clustering_range = (2, 10) distance_function = "euclidean" print("Clustering") print("Using Fuzzy C-Means") fuzzyclustering_model = fzclustering(users_skills, range(*clustering_range), True) # returned values with order # Cluster centers. Data for each center along each feature provided for every cluster (of the c requested clusters). print("- Number of clusters found", len(fuzzyclustering_model[0])) print("- Real number of clusters", len(np.unique(clusters_ground_truth))) users_distances_to_centers = cdist(users_skills, fuzzyclustering_model[0], metric=distance_function) # pca = PCA(n_components=2) # # pca.fit(users_skills) # new_data = pca.transform(users_skills) # # pca.fit(fuzzyclustering_model[0]) # new_data2 = pca.transform(fuzzyclustering_model[0]) # c = np.concatenate((fuzzyclustering_model[1], np.array([6] * len(fuzzyclustering_model[0])))) # new_data = np.concatenate((new_data, new_data2), axis=0) # # axs[1, 1].scatter(new_data.T[0], new_data.T[1], c=c, alpha=0.5) # print("Plotting graph") #plot_graph(G, "Fuzzy_graph.png", colors=fuzzyclustering_model[1]) print("Link prediction") model, y_train, predicted_train, y_test, predicted_test = link_prediction( G, users_distances_to_centers) print("Evaluation") print("- Train") print_evaluate(y_train, predicted_train) print("- Test") print_evaluate(y_test, predicted_test) print("Visualization") visualization(model, G, users_distances_to_centers, fuzzyclustering_model[1])
def main(): # change include read number of columns data, labels = read("Homework2_pca_c.txt", 12) # data = data.astype(np.float) my_pca_res = mypca.pca(data) sklearn_pca_res = skap.apply_pca(data) sklearn_svd_res = skap.apply_svd(data) sklearn_tsne_res = skap.apply_tsne(data) vs.visualization(my_pca_res, labels, 'my_pca', 'PC') vs.visualization(sklearn_pca_res, labels, 'sklearn_pca', 'PC') vs.visualization(sklearn_svd_res, labels, 'sklearn_svd', 'SV') vs.visualization(sklearn_tsne_res, labels, 'sklearn_tsne', 'tSNE')
def extractFeatures(directory): for x in range(num,num+1): X = open("train/train_x"+str(num)+".data","w") Y = open("train/train_y"+str(num)+".data","w") path = directory + str(x); label_file_path = path + '/activityLabel.txt'; activityLabelDict = getLabelDict(label_file_path) files = [] for file in os.listdir(path): if file.endswith(".txt"): files.append(file) i=0 for file_name in files: print i, file_name i=i+1 if file_name!='activityLabel.txt': file_path = path + '/' + file_name v = visualization.visualization(file_path,5) trans=False activity_num = np.array([activityToClassNumDict[activityLabelDict[file_name]]]) createDescriptorAndSave(v,X,Y,activity_num,trans)
def run(para): # Create object for an input econ_data = input_data.elec_consume_data(para) # Based on the parameter, split data into train and test data Xtrain, Ytrain, Xtest, Ytest = econ_data.process_input() # Create object for an model e_model = econ_model.econ_model(para, Xtrain, Ytrain, Xtest, Ytest) # Create a model model = e_model.create_model() model.summary() # Create a chart object chart = visualization.visualization() # Train and evaluate a model if para.need_to_train: _, history = e_model.train() # train a model chart.display_train_loss(history) # show train and validaton loss e_model.save_model() # save a model and weight e_model.evaluate() # evaluate with the metic MSR using test data # Predict if para.need_to_predict: Ytest, prediction = e_model.predict() # return real and predication print("prediction shape: ", prediction.shape) # Inverse the original value inversed_pred = econ_data.inverse_scaled_data(prediction) inversed_Ytest = econ_data.inverse_scaled_data(Ytest) # Show sum and mean of difference between inversed real value and prediction diff_sum, diff_mean = chart.calc_diff(inversed_Ytest, inversed_pred) print("diff_sum, diff_mean: ", diff_sum, diff_mean) # Save the two values to csv file chart.save_prediction(inversed_Ytest, inversed_pred) # Display a chart of them chart.display_elect_pred(inversed_pred, inversed_Ytest)
def skeletons(self, exp_id, cam_id, frame_id): """Test skeletons.""" visualizer = visualization.visualization() exp = experiment.experiment(new_experiment=False, ts=exp_id) room_name = exp.metadata["room"] if room_name.lower() == "cears": room_id = 1 elif room_name.lower() == "computer_lab": room_id = 0 devices = self.rooms[room_id]["devices"] devices.sort() try: cam_name = os.path.basename(devices[int(cam_id)]) except: return "No cam found sorry!" exp_path = self.um.experiment_path(str(exp_id)) pose_detection_result = "output/pose" json = "pose" img = "img" fname = os.path.join(exp_path, pose_detection_result, json, cam_name, str(frame_id) + ".png.json") output_fname = os.path.join(exp_path, pose_detection_result, img, cam_name, "matchstick_" + str(frame_id) + ".png") print fname self.app.logger.info(fname) if self.um.check_file_exists(fname): json_data = self.um.read_json(fname) people_in_frame = people(json_data, frame_id) visualizer.draw_matchsticks(people_in_frame, output_fname) npeople = str(len(people_in_frame.list)) else: npeople = 0 return "Number of people drawn: {n}".format(n=npeople)
def main(): try: while True: print "\n Please follow the instruction." print "\n Please enter a year between 1800 and 2012 to plot or you can enter 'quit' to exit the program.." var1 = raw_input() if var1 == re.search(r"^[0-9]{4}", var1).group(): try: data_in_given_year(int(var1)).annual_income() except KeyError: print "Invalid input of selecting a year between 1800 and 2012." elif var1 == "quit": sys.exit() print "\n You can enter 'next' to stop to get specific plots for 2007 to 2012, or you can enter 'quit' to exit the program." var1 = raw_input() if var1 == "next": break year_list = range(2007, 2013) # Question8 for year in year_list: output = v.visualization(year) output.histogram_plotting() output.boxplot_plotting() print "\n Figures have saved at current directory." except KeyboardInterrupt: print "Oops, interruption." except TypeError: print "Oops, invalid type." except ValueError: print "Oops, invalid value." except KeyError: print "Opps, invalid year."
def main(): m1 = 1.0 m2 = 1.0 l1 = 1.0 l2 = 1.0 q1 = np.pi / 4 q2 = np.pi / 4 initial_state = np.array([[q1], [q2]]) # Initializing pendubot pendubot = pendubot_dynamic_system(l1, l2, m1, m2, initial_state) # Initializing visualization vis = visualization(l1, l2, initial_state) # Running simulation start_time = time.time() print("Simulation started at ", start_time) ddq, dq, q = pendubot.integrate_one_step(0.0, 0.0) vis.plot(q) while True: time_step = time.time() - start_time ddq, dq, q = pendubot.integrate_one_step(0.0, 0.01) vis.animate(q)
if args.domain is None: args.domain = find_domain(args.problem) else: args.domain = os.path.abspath(args.domain) search = SEARCHES[args.search] heuristic = HEURISTICS[args.heuristic] if args.search in ['bfs', 'ids', 'sat', 'dfs']: heuristic = None logging.info('using search: %s' % search.__name__) logging.info('using heuristic: %s' % (heuristic.__name__ if heuristic else None)) use_preferred_ops = (args.heuristic == 'hffpo') solution = search_plan(args.domain, args.problem, search, heuristic, use_preferred_ops=use_preferred_ops) if solution is None: logging.warning('No solution could be found') else: solution_file = args.problem + '.soln' logging.info('Plan length: %s' % len(solution)) _write_solution(solution, solution_file) validate_solution(args.domain, args.problem, solution_file) if args.search in ['bfs', 'dfs']: vs.visualization(node_data)
# run the simulation 200 times for n in range(200): # Indicate the number of the iteration print("#", n, " iteration") # Initialize the user list and intimacy map coordinates init() init_graph() init_stat() # Initialize users beginning(init_S, init_T, alloc, cred, size) # Plot the initial snapshot of the system visualization('Begin.pdf') # Schedule interactions for all active users makeComb() # Run the simulation Runsim(sim_time) # The time of simulation can be adjusted # Derive the statistics of the network (to be extended after the checkpoint) untouched = 0 touched = 0 numS = 0 numT = 0 num_all = len(admin.all_user) for i in admin.all_user: if i.value == 0: untouched += 1
self.name = name self.year = year self.length = length self.tracks = tracks def __repr__(self) -> str: return f'{self.name}, {self.year}' if __name__ == '__main__': albums: List[Album] = [ Album("Got to Be There", 1972, 35.45, 10), Album("Ben", 1972, 31.31, 10), Album("Music & Me", 1973, 32.09, 10), Album("Forever, Michael", 1975, 33.36, 10), Album("Off the Wall", 1979, 42.28, 10), Album("Thriller", 1982, 42.19, 9), Album("Bad", 1987, 48.16, 10), Album("Dangerous", 1991, 77.03, 14), Album("HIStory: Past, Present and Future, Book I", 1995, 148.58, 30), Album("Invincible", 2001, 77.05, 16) ] kmeans: KMeans = KMeans(2, albums) clusters: List[KMeans.Cluster] = kmeans.run() for index, cluster in enumerate(clusters): print( f'Cluster {index} Avg Length {cluster.centroid.dimensions[0]}' f' Avg Tracks {cluster.centroid.dimensions[1]}: {cluster.points}\n' ) visualization(clusters, 'length', 'tracks')
from glassdoor_scrapper import glassdoor from stock_financials import stock_financials from visualization import visualization output = dict() company = input("Enter Company name:") ticker = input("Enter Company ticker symbol:") gd = glassdoor() url = gd.glassdoor_login_navigate(company) output.update(gd.glassdoor_scrapping(url)) s = stock_financials() output.update(s.get_stock_financials(ticker)) v = visualization() v.generate_report(output)
class kerasGeneratorWrapper: # maybe we can implement for .. pass if __name__=='__main__': import pickle from visualization import visualization import matplotlib.pyplot as plt # test code for data augmentation with open('testGen.pickle', 'rb') as f: file = pickle.load(f) image = Image.open(file['image']) bboxes = file['bboxes'].copy() image, bboxes[:, :4] = resizeImage(image, bboxes[:, :4], dims=[800, 600]) visImage = visualization(image, bboxes[:, :4], np.arange(bboxes.shape[0])) baseAnchors = genBaseAnchors(16, [0.5, 1.0, 2.0], 2**np.arange(3, 6)) labels, targetBBoxes = genAnchorLabel([800, 600], 16, bboxes[:, :4], baseAnchors, batchsize=128) plt.imshow(visImage) plt.show() #parameters = {'angle':45, 'shift':10} #dataAugmentation(image, bboxes[:, :4], parameters) # test code for Generator v1(primary work) """ gen = Generator(data, [800, 600], batchsize=64, isSample=True) sumP, sumN =0, 0 for i in range(1000):
def main(): print("main") area_dimensions = (50,50) vis = visualization(area_dimensions,5) running = True #points = generate_points(area_dimensions,box(0,0,area_dimensions[0],area_dimensions[1]),20) #points = generate_points(area_dimensions,box(0,0,100,100),20) #coverage(points,10) #vis.draw_cirles(points,25,"BLACK") #picked = Greedy_Select_Points(points,5) n =3 total_X = [] E1 = [ (0,1), (0,2), (0,3), (1,2), (1,3), (2,3) ] E1 = [] for i in range(16): for j in range(i+1,16+1): E1.append((i,j)) #print(E1) #exit() E2 = [] for i in range(1,16): E2.append((i-1,i)) print(E2) print(E1) #exit() """ exit() E2 = [ (0,1), (1,2), (2,3), (3,4), (4,5), (5,6), (6,7), (7,8), (8,9) """ avgNoReplace =0 avgReplace =0 for i in range(100): X = [] for i in range(n): for j in range(n): #X_i =generate_points(area_dimensions,box(i*(area_dimensions[0]/n),j*(area_dimensions[1]/n),(i+1)*(area_dimensions[0]/n),(j+1)*(area_dimensions[1]/n)),5) X_i =generate_points(area_dimensions,box(0,0,area_dimensions[0],area_dimensions[1]),10) X.append(X_i) total_X.extend(X_i) """ for x in X: vis.draw_cirles(x,1,"BLACK") """ vis.update() S1 = limited_information_greedy(X,E1) S2 = limited_information_greedy(X,E2) #S3 = limited_information_greedy_with_replacement(X,E1) S4 = limited_information_greedy_with_replacement(X,E2) avgNoReplace += (coverage(S2,10)/coverage(S1,10))/100 avgReplace += (coverage(S4,10)/coverage(S1,10))/100 results = "no replace average {0}, replace average{1}".format(avgNoReplace,avgReplace) print(results) """ results ="No Replacement for E1 Coverage = {0}, for E2 Coverage = {1}" results2 = "Replacemment for E1 Coverage = {0}, for E2 Coverage = {1}" results = results.format(coverage(S1,10),coverage(S2,10)) results2 = results2.format(coverage(S3,10),coverage(S4,10)) """ vis.draw_cirles(S1,10,"GREEN") vis.update() vis.draw_cirles(S2,9,"BLUE") vis.update() vis.draw_cirles(S3,8,"RED") vis.update() vis.draw_cirles(S4,7,"PURPLE") vis.update() while running: for event in pygame.event.get(): if event.type == pygame.QUIT: running = False
def run(): dims = (100, 100) vis = visualization(dims, 5) n = 30 m = 10 a = 400 b = 500 print("------------Distributed Submodular Test -----------") print("# of agents = ", n) print("|X_i| = ", m) print("Minimum Sensor Area = ", a) print("Maximum Sensor Area = ", b) print("dimensions of area ", dims[0], "x", dims[1]) print("ground sets are uniformly distributed over the area") print("---------------------Executing --------------------") Xn = [] for i in range(n): #Xn.append([{'x':random.gauss(1,1)*10,'y':random.gauss(1,1)*10,'r':random.uniform(sqrt(a/pi),sqrt(b/pi))} for i in range(m)]) Xn.append([{ 'x': random.random() * dims[0], 'y': random.random() * dims[1], 'r': random.uniform(sqrt(a / pi), sqrt(b / pi)) } for i in range(m)]) graph1 = [[]] graph2 = [[]] for i in range(1, n): graph1.append([j for j in range(i)]) for i in range(1, n): graph2.append([j for j in range(i)]) while len(graph2[i]) > n - i: graph2[i].pop(0) sim = submodular_sim() Sg1 = sim.distributed_greedy(Xn, graph1) Su = sim.distributed_upperbound_greedy(Xn) Sl = sim.distributed_lowerbound_greedy(Xn) compute_upper_lower_marginal(Su, sim) compute_upper_lower_marginal(Sl, sim) compute_upper_lower_marginal(Sg1, sim) W = compute_similarity_matrix(Xn, n, m, a, b) print(" 1/n||W||", (1 / n) * np.linalg.norm(W, float("inf"))) eps = (1 / (m * n)) * np.linalg.norm(W, float("inf")) gamma = eps / (1 - eps) weights = [] for i in range(n): for j in range(i): d = dist(Sl[i], Sl[j]) weights.append(similarity_weight(d, a, b)) print("sum of weights:", sum(weights)) #alpha = sum(weights) print("1+gamma", 1 + gamma) print(W) exit() plt.show() print("Actual Value:", sim.f(Su)) graph1 = [[]] graph2 = [[]] for i in range(1, n): graph1.append([j for j in range(i)]) for i in range(1, n): graph2.append([j for j in range(i)]) while len(graph2[i]) > n - i: graph2[i].pop(0) Sg1 = sim.distributed_greedy(Xn, graph1) print("full graph greedy:", sim.f(Sg1)) """
def main(): # Initialize the network if args.option == 'default': ResNet = networks.__dict__[args.arch]('D').to(device) else: ResNet = networks.__dict__[args.arch](args.option).to(device) print(ResNet) summary(ResNet, input_size=(3, 32, 32)) # Initialize train/test set normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # ImageNet statistics train_transform = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4, padding_mode='edge'), transforms.ToTensor(), normalize]) test_transform = transforms.Compose([transforms.ToTensor(), normalize]) train_set = torchvision.datasets.CIFAR10(root='./CIFAR10', train=True, download=True, transform=train_transform) train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.bs, shuffle=True, num_workers=args.workers) test_set = torchvision.datasets.CIFAR10(root='./CIFAR10', train=False, download=True, transform=test_transform) test_loader = torch.utils.data.DataLoader(test_set, batch_size=args.bs, shuffle=False, num_workers=args.workers) # Initialize torch gradient setup criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(ResNet.parameters(), lr=args.lr, weight_decay=args.wd) milestones = np.linspace(0, args.epochs, args.milestones + 2)[1:-1].astype(int).tolist() scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=args.lr_decay) # Initialize statistics for training # epoch_time = [] # loading_time = [] train_hist = [] # recorde gradient norm if args.gn: gradient_recorder = utils.GradientNorm(ResNet) else: gradient_recorder = None # Train-test tic = time.time() for epoch in range(args.epochs): train_loss, train_acc = train(epoch, ResNet, train_loader, criterion, optimizer, gradient_recorder) scheduler.step() test_loss, test_acc = test(ResNet, test_loader, criterion) train_hist.append([train_loss, train_acc, test_loss, test_acc]) toc = time.time() print('Total traning time: {:.2f}s'.format(toc - tic)) # recorde gradient norm if args.gn: gradient_norm_hist = gradient_recorder.get_graidnet_norm_hist() gradient_norm_hist = np.array(gradient_norm_hist)/len(train_loader) else: gradient_norm_hist = None # statistics and save checkpoint train_hist = np.array(train_hist) # for i in range(100): # save_dir = './Results/' + args.arch + '_' + str(i + 1) # if not os.path.exists(save_dir): # os.makedirs(save_dir) # break if args.dir == 'default': save_dir = './Results/' + args.arch else: save_dir = args.dir if not os.path.exists(save_dir): os.makedirs(save_dir) activation = evaluation.calcu_layer_responses(None, ResNet) torch.save({'state_dict': ResNet.state_dict(), 'train_hist': train_hist, 'gradient_hist': gradient_norm_hist, 'activation': activation}, os.path.join(save_dir, 'model.th')) # visualization visualization.visualization(save_dir)
def transform(): form = flask.request.form service = seq2seq_service if form["model"] == "seq2seq" else cyclegan_service if form["model"] == "cyclegan": model_name = None direction = None if form["srcGenre"] == "jazz" and form["tarGenre"] == "pop": model_name = "jazz_pop" direction = "AtoB" elif form["srcGenre"] == "pop" and form["tarGenre"] == "jazz": model_name = "jazz_pop" direction = "BtoA" elif form["srcGenre"] == "jazz" and form["tarGenre"] == "classic": model_name = "jazz_classic" direction = "AtoB" elif form["srcGenre"] == "classic" and form["tarGenre"] == "jazz": model_name = "jazz_classic" direction = "BtoA" elif form["srcGenre"] == "classic" and form["tarGenre"] == "pop": model_name = "classic_pop" direction = "AtoB" elif form["srcGenre"] == "pop" and form["tarGenre"] == "classic": model_name = "classic_pop" direction = "BtoA" else: print("Unspported translate action!") return "" else: target_style = form["tarGenre"] basename = "" if form["type"] == "select": f = form["filePath"] shutil.copy2(f, input_folder) basename = pathlib.Path(f).name else: file = flask.request.files["file"] basename = file.filename fullname = input_folder + file.filename file.save(fullname) if basename.endswith("mid"): # convert into npy convert_to_npy(input_folder + basename) basename_npy = basename[:-3] + "npy" visualization.visualization(input_folder + basename_npy, tmp_folder + "wav.png") b = pathlib.Path(basename) output_name = output_folder + b.stem + "_transfer.mid" input_name = input_folder + basename if form["model"] == "cyclegan": service.run_file(input_folder, output_folder, model_name, direction) else: service.run_file(input_name, output_name, target_style) return json.dumps({ "image": tmp_folder + "wav.png", "music": output_name })
# Store the auc results aucs.append(auc_val) pass_list = ["hl6"] + selected_efps ados_list = [np.nan] + ados efp_df = pd.DataFrame({ "efp": pass_list, "auc": aucs, "ado": ados_list }) efp_df.to_csv(f"{it_dir}/selected_efps.csv") # Isolate random dif-order pairs idx0, idx1 = isolate_order(ix, 6000) # Check ado with each EFP for most similar DO on dif-order pairs check_efps(ix) # Get the max EFP and save it efp_max, ado_max = get_max_efp(ix) selected_efps.append(efp_max) print(selected_efps) ados.append(ado_max) # Make plots viz = visualization(it_dir, ix) viz.dif_order_hist_plots() # viz.nn_design_heatmap() viz.performance_plot() viz.clear_viz() ix += 1
#--------------------------------------- FUNCTION SPACE --------------------------------------------------# def normalize(histogram): for i in range(len(histogram)): mean = np.mean(histogram[i]) var = np.var(histogram[i]) histogram[i] = (histogram[i] - mean) / var return histogram #---------------------------------------------------------------------------------------------------------# FOURIER_COEFF_LENGTH = 60 v = visualization.visualization( '/media/arya/54E4C473E4C458BE/Action_dataset/data1/0512164800.txt', 3) histogram = hist.createHistogram(v) histogram = normalize(histogram) feature_vector = np.zeros((len(histogram) * FOURIER_COEFF_LENGTH), dtype='float') for i in range(len(histogram)): fourier_pyramid = fourier_ag.fourier(histogram[i], 2, 20) fourier_pyramid.createFeature() s = FOURIER_COEFF_LENGTH * i e = FOURIER_COEFF_LENGTH * (i + 1) feature_vector[s:e] = fourier_pyramid.feature_out print feature_vector.shape
refresh_time = df1['refresh_time'].item() # 读取要刷新的时间 df2 = pd.read_csv("./csv_data/keywords.csv", header=0, encoding='utf-8-sig') keywords_list = [] for item in df2['keywords']: keywords_list.append(item) is_open = False # 这个flag是用来判断有没有打开网页的,如果没有就打开,如果已经打开了就刷新网页 while True: zhihu = Zhihu() # Zhihu是定义的类 zhihu.login() zhihu.crawl() print("\n\n爬虫完毕\n\n") visualization() print("\n\n可视化完毕") # 如果可视化的网页已经开起来了,就刷新; 还没有打开就开起来 if not is_open: open_local_html() is_open = True else: refresh_local_html() t = refresh_time # 检查有没有出现keywords! with open("ZhihuRanking.txt", 'r', encoding='utf-8') as f: lines = f.readlines() cnt = 0 # 用来记录正在读第几条line cnt_list = [ ] # 用来记录哪几条line出现了关键词, 等下要作为参数传给customized_visualization() for line in lines:
# Store the auc results aucs.append(auc_val) pass_list = ["hl6"] + selected_efps ados_list = [np.nan] + ados efp_df = pd.DataFrame({ "efp": pass_list, "auc": aucs, "ado": ados_list }) efp_df.to_csv(run_path / "selected_efps.csv") # Isolate random dif-order pairs isolate_order(ix=ix, N_pairs=5e7) # Check ado with each EFP for most similar DO on dif-order pairs check_efps(ix) # Get the max EFP and save it efp_max, ado_max = get_max_efp(ix) selected_efps.append(efp_max) print(f"Selected EFPs in Pass {ix}") print(selected_efps) ados.append(ado_max) # Make plots viz = visualization(run_path, ix) viz.dif_order_hist_plots() viz.performance_plot() viz.clear_viz() ix += 1
import matplotlib.pyplot as plt from sklearn.decomposition import PCA from sklearn.datasets import load_iris import numpy as np from visualization import visualization data = load_iris() y = data.target X = data.data pca = PCA(n_components=3) reduce_X = pca.fit_transform(X) visualization(y, reduce_X)
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) if int(args.double_precision): torch.set_default_dtype(torch.float64) if int(args.cuda) >= 0: torch.cuda.manual_seed(args.seed) args.device = 'cuda:' + str(args.cuda) if int(args.cuda) >= 0 else 'cpu' args.patience = args.epochs if not args.patience else int(args.patience) logging.getLogger().setLevel(logging.INFO) if args.save: if not args.save_dir: dt = datetime.datetime.now() date = f"{dt.year}_{dt.month}_{dt.day}" models_dir = os.path.join(os.environ['LOG_DIR'], args.task, date) save_dir = get_dir_name(models_dir) else: save_dir = args.save_dir logging.basicConfig(level=logging.INFO, handlers=[ logging.FileHandler( os.path.join(save_dir, 'log.txt')), logging.StreamHandler() ]) logging.info(f'Using: {args.device}') logging.info("Using seed {}.".format(args.seed)) # Load data data = load_data(args, os.path.join(os.environ['DATAPATH'], args.dataset)) args.n_nodes, args.feat_dim = data['features'].shape if args.task == 'nc': Model = NCModel args.n_classes = int(data['labels'].max() + 1) logging.info(f'Num classes: {args.n_classes}') elif args.task == 'dr': Model = DRModel args.n_classes = int(2) logging.info(f'Dimension reduction Num classes: {args.n_classes}') else: args.nb_false_edges = len(data['train_edges_false']) args.nb_edges = len(data['train_edges']) if args.task == 'lp': Model = LPModel else: Model = RECModel # No validation for reconstruction task args.eval_freq = args.epochs + 1 if not args.lr_reduce_freq: args.lr_reduce_freq = args.epochs # Model and optimizer model = Model(args) logging.info(str(model)) optimizer = getattr(optimizers, args.optimizer)(params=model.parameters(), lr=args.lr, weight_decay=args.weight_decay) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=int( args.lr_reduce_freq), gamma=float(args.gamma)) tot_params = sum([np.prod(p.size()) for p in model.parameters()]) logging.info(f"Total number of parameters: {tot_params}") if args.cuda is not None and int(args.cuda) >= 0: os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda) model = model.to(args.device) for x, val in data.items(): if torch.is_tensor(data[x]): data[x] = data[x].to(args.device) # Train model t_total = time.time() counter = 0 best_val_metrics = model.init_metric_dict() best_test_metrics = None best_emb = None for epoch in range(args.epochs): t = time.time() model.train() optimizer.zero_grad() embeddings = model.encode(data['features'], data['adj_train_norm']) train_metrics = model.compute_metrics(embeddings, data, 'train') train_metrics['loss'].backward() if args.grad_clip is not None: max_norm = float(args.grad_clip) all_params = list(model.parameters()) for param in all_params: torch.nn.utils.clip_grad_norm_(param, max_norm) optimizer.step() lr_scheduler.step() if (epoch + 1) % args.log_freq == 0: logging.info(" ".join([ 'Epoch: {:04d}'.format(epoch + 1), 'lr: {}'.format(lr_scheduler.get_lr()[0]), format_metrics(train_metrics, 'train'), 'time: {:.4f}s'.format(time.time() - t) ])) # if (epoch + 1) % args.eval_freq == 0: # model.eval() # embeddings = model.encode(data['features'], data['adj_train_norm']) # val_metrics = model.compute_metrics(embeddings, data, 'val') # if (epoch + 1) % args.log_freq == 0: # logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')])) # if model.has_improved(best_val_metrics, val_metrics): # best_test_metrics = model.compute_metrics(embeddings, data, 'test') # best_emb = embeddings.cpu() # if args.save: # np.save(os.path.join(save_dir, 'embeddings.npy'), best_emb.detach().numpy()) # best_val_metrics = val_metrics # counter = 0 # else: # counter += 1 # if counter == args.patience and epoch > args.min_epochs: # logging.info("Early stopping") # break logging.info("Optimization Finished!") logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total)) if not best_test_metrics: model.eval() best_encode = model.encode(data['features'], data['adj_train_norm']) best_emb = model.decode(best_encode, data['adj_train_norm'], data['idx_train']) best_test_metrics = model.compute_metrics(best_encode, data, 'train') logging.info(" ".join( ["Val set results:", format_metrics(best_val_metrics, 'val')])) logging.info(" ".join( ["Test set results:", format_metrics(best_test_metrics, 'test')])) if args.save: np.save(os.path.join(save_dir, 'embeddings.npy'), best_emb.cpu().detach().numpy()) np.save(os.path.join(save_dir, 'labels.npy'), data['labels']) if hasattr(model.encoder, 'att_adj'): filename = os.path.join(save_dir, args.dataset + '_att_adj.p') pickle.dump(model.encoder.att_adj.cpu().to_dense(), open(filename, 'wb')) print('Dumped attention adj: ' + filename) json.dump(vars(args), open(os.path.join(save_dir, 'config.json'), 'w')) torch.save(model.state_dict(), os.path.join(save_dir, 'model.pth')) logging.info(f"Saved model in {save_dir}") save_fig = True if save_fig: visualization(save_dir)
# 生成与保存 def generate_and_save(model, epoch, test_input): # 注意 training` 设定为 False # 因此,所有层都在推理模式下运行(batchnorm)。 if use_gpu: test_input = test_input.cuda() # model.eval() with torch.no_grad(): predictions = model(test_input) predictions = predictions.cpu().numpy() * 127.5 + 127.5 # 还原数据 predictions = predictions.reshape( predictions.shape[0], 15, ).astype(np.int) df = pd.DataFrame(predictions) df.to_csv('output' + timestamp + '.csv', mode='a', header=False, index=None) for it in range(1, 20): print(it) # 批量化和打乱数据 data = ColorDataset(train_data) dataloader = DataLoader(data, batch_size=BATCH_SIZE, shuffle=True) # 如果shuffle 的buffer_size=数据集样本数量,随机打乱整个数据集 train(dataloader, EPOCHS) # 进行模型训练,并查看最后一次的训练结果 tb.close() visualization('output' + timestamp)
def embedded_query_expansion_ci(query_embedded, query_wordcount, collection, collection_total_similarity, word2vec, interpolated_aplpha, m): # load query model query_model = Pickle.load(open("model/query_model.pkl", "rb")) embedded_query_expansion = query_model update_embedded_query_expansion = {} if os.path.isfile("model/update_embedded_query_expansion_ci.pkl") == True: # check if a file exist update_embedded_query_expansion = Pickle.load( open("model/update_embedded_query_expansion_ci.pkl", "rb")) else: # calculate every query for query, query_word_count_dict in query_wordcount.items(): top_prob_dict = {} # calculate every word in collection for word in collection.keys(): total_probability = collection_total_similarity[word] p_w_q = 0 if not word in query_word_count_dict: p_w_q = total_probability # p(w|q) # total probability theory(for every query term) for query_term in query_word_count_dict.keys(): if query_term in query_embedded: cur_word_similarity = word2vec.getWordSimilarity( query_embedded[query_term], collection[word]) p_w_q *= (cur_word_similarity / total_probability) # storage probability top_prob_dict[word] = p_w_q # softmax top_prob_dict = ProcDoc.softmax(top_prob_dict) # sorted top_prob_dict by value(probability) top_prob_list = sorted(top_prob_dict.items(), key=operator.itemgetter(1), reverse=True) update_embedded_query_expansion[query] = top_prob_list # storage update expansion Pickle.dump(update_embedded_query_expansion, open("model/update_embedded_query_expansion_ci.pkl", "wb"), True) # update query model for update_query, update_query_word_list in update_embedded_query_expansion.items( ): filepath = "visual/" + update_query + "_ci.png" if os.path.isfile(filepath) == False: visualization.visualization(collection, update_query_word_list, filepath) for update_word, update_count in update_query_word_list[:m]: update = update_count origin = 0 if update_word in query_model[update_query]: origin = query_model[update_query][update_word] query_model[update_query].pop(update_word, None) embedded_query_expansion[update_query][ update_word] = interpolated_aplpha * origin + ( 1 - interpolated_aplpha) * update for un_changed_word in query_model[update_query].keys(): embedded_query_expansion[update_query][ un_changed_word] *= interpolated_aplpha # softmax embedded_query_expansion[update_query] = ProcDoc.softmax( embedded_query_expansion[update_query]) return embedded_query_expansion