def search_engine_3(query_match): print('In which year was the movie released?') year_user = int(input()) # Rank the results by closeness to a given year years = utils.year_docs(query_match) sim_years = utils.sim_docs(years, year_user) df = pd.DataFrame(columns=['Title','Intro','Wikipedia Url', 'Similarity']) for sim in heapq.nlargest(5, sim_years.items(), key = lambda i: i[1]): i = sim[0] # document_id file = open('webpages/tsv/output_%d.tsv' %i).read().split('\n\n')[1].split('\t') title, intro, link = file[3].encode('utf8').decode("unicode_escape"), file[1].encode('utf8').decode("unicode_escape"), urls[str(i+1)] new_row = {'Title':title, 'Intro': intro, 'Wikipedia Url': link, 'Similarity': sim[1]} df = df.append(new_row, ignore_index=True) # Visualization of the top 5 documents related to the query d = dict(selector="th", props=[('text-align', 'center')]) df1 = df.sort_values(by=['Similarity'], ascending = False) df1.style.format({'Wikipedia Url': utils.make_clickable}).hide_index().set_table_styles([d]).set_properties(**{'text-align': 'center'}).set_properties(subset=['Title'], **{'width': '130px'}) # Bonus: CO-STARDOM NETWORK movies = [movie[0] for movie in heapq.nlargest(10, sim_years.items(), key = lambda i: i[1])] G = utils.add_nodes(movies) G = utils.add_edges(G) network = utils.draw_graph(G) return df, network
def main(): from pascal.pascal_helpers import load_pascal from datasets.pascal import PascalSegmentation from utils import add_edges from scipy.misc import imsave from skimage.segmentation import mark_boundaries ds = PascalSegmentation() data = load_pascal("train1") data = add_edges(data, independent=False) # X, Y, image_names, images, all_superpixels = load_data( # "train", independent=False) for x, name, sps in zip(data.X, data.file_names, data.superpixels): segments = get_km_segments(x, ds.get_image(name), sps, n_segments=25) boundary_image = mark_boundaries(mark_boundaries(ds.get_image(name), sps), segments[sps], color=[1, 0, 0]) imsave("hierarchy_sp_own_25/%s.png" % name, boundary_image)
def main(): from pascal.pascal_helpers import load_pascal from datasets.pascal import PascalSegmentation from utils import add_edges from scipy.misc import imsave from skimage.segmentation import mark_boundaries ds = PascalSegmentation() data = load_pascal("train1") data = add_edges(data, independent=False) #X, Y, image_names, images, all_superpixels = load_data( #"train", independent=False) for x, name, sps in zip(data.X, data.file_names, data.superpixels): segments = get_km_segments(x, ds.get_image(name), sps, n_segments=25) boundary_image = mark_boundaries(mark_boundaries( ds.get_image(name), sps), segments[sps], color=[1, 0, 0]) imsave("hierarchy_sp_own_25/%s.png" % name, boundary_image)
patent_classification) delta = 10 train_percentage = 0.8 my_range = utils.Range(delta, train_percentage, patents['date'].min(), patents['date'].max()) my_range.print() G = igraph.Graph(directed=True) global_assigned_patents = dict() while my_range.range_end <= my_range.max_date: range_patents, range_train_patents, range_test_patents, range_uspatentcitations = utils.find_range_dataframes( my_range, patents, uspatentcitations) G = utils.add_edges(G, range_uspatentcitations) print("finding components") connected_components = leidenalg.find_partition( G, leidenalg.ModularityVertexPartition) subgraphs = connected_components.subgraphs() num_subgraphs = len(subgraphs) range_assigned_patents = utils.igraph_classify_train_test_graph( subgraphs, num_subgraphs, range_patents, range_train_patents, range_test_patents) for key in range_assigned_patents.keys(): global_assigned_patents[key] = range_assigned_patents[key] my_range.increase_proportionally(delta, train_percentage) my_range.print() G = igraph.Graph(directed=True) # restore the initial graph
import itertools as it import networkx as nx import pandas as pd import utils as u # Create empty graph graph = nx.Graph() # Load edge and node lists edgelist = pd.read_csv('edgelist_wmata.csv') nodelist = pd.read_csv('nodelist_wmata.csv') # Add edges, nodes, and their attributes graph = u.add_edges(graph, edgelist) graph = u.add_nodes(graph, nodelist) # Find nodes of odd degree and odd node pairs odd_degree_nodes = u.find_odd_degree_nodes(graph) odd_node_pairs = list(it.combinations(odd_degree_nodes, 2)) # Compute shortest distance between each pair of nodes in graph distances = u.find_shortest_distances(odd_node_pairs, graph) # Create complete graph graph_complete = u.build_complete_graph(distances) # Compute minimum weight matching, removing duplicates matches = u.compute_min_weight_matches(graph_complete)
from graph import Graph import utils if __name__ == "__main__": v, adjacency_matrix = utils.read_from_file("matrix") edges = utils.get_edges(adjacency_matrix) g = Graph(v) utils.add_edges(g, edges) g.dfs(2) print("-----------------------------") g1 = Graph(v) utils.add_edges(g1, edges) g1.bfs(4) print("-----------------------------")
def crazy_visual(): dataset = NYUSegmentation() # load training data data = load_nyu(n_sp=500) data = add_edges(data) for x, image_name, superpixels, y in zip(data.X, data.file_names, data.superpixels, data.Y): print(image_name) if int(image_name) != 11: continue image = dataset.get_image(image_name) plt.figure(figsize=(20, 20)) bounary_image = mark_boundaries(image, superpixels) plt.imshow(bounary_image) gridx, gridy = np.mgrid[:superpixels.shape[0], :superpixels.shape[1]] edges = x[1] points_normals = dataset.get_pointcloud_normals(image_name) centers2d = get_superpixel_centers(superpixels) centers3d = [ np.bincount(superpixels.ravel(), weights=c.ravel()) for c in points_normals[:, :, :3].reshape(-1, 3).T ] centers3d = (np.vstack(centers3d) / np.bincount(superpixels.ravel())).T sp_normals = get_sp_normals(points_normals[:, :, 3:], superpixels) offset = centers3d[edges[:, 0]] - centers3d[edges[:, 1]] offset = offset / np.sqrt(np.sum(offset**2, axis=1))[:, np.newaxis] #mean_normal = (sp_normals[edges[:, 0]] + sp_normals[edges[:, 1]]) / 2. mean_normal = sp_normals[edges[:, 0]] #edge_features = np.arccos(np.abs((offset * mean_normal).sum(axis=1))) * 2. / np.pi edge_features = 1 - np.abs((offset * mean_normal).sum(axis=1)) no_normals = (np.all(sp_normals[edges[:, 0]] == 0, axis=1) + np.all(sp_normals[edges[:, 1]] == 0, axis=1)) edge_features[no_normals] = 0 # nan normals if True: coords = points_normals[:, :, :3].reshape(-1, 3) perm = np.random.permutation(superpixels.max() + 1) mv.points3d(coords[:, 0], coords[:, 1], coords[:, 2], perm[superpixels.ravel()], mode='point') #mv.points3d(centers3d[:, 0], centers3d[:, 1], centers3d[:, 2], scale_factor=.04) mv.quiver3d(centers3d[:, 0], centers3d[:, 1], centers3d[:, 2], sp_normals[:, 0], sp_normals[:, 1], sp_normals[:, 2]) mv.show() from IPython.core.debugger import Tracer Tracer()() for i, edge in enumerate(edges): e0, e1 = edge #color = (dataset.colors[y[e0]] + dataset.colors[y[e1]]) / (2. * 255.) #f = edge_features[i] #if f < 0: #e0, e1 = e1, e0 #f = -f #plt.arrow(centers[e0][0], centers[e0][1], #centers[e1][0] - centers[e0][0], centers[e1][1] - centers[e0][1], #width=f * 5 #) color = "black" plt.plot([centers2d[e0][0], centers2d[e1][0]], [centers2d[e0][1], centers2d[e1][1]], c=color, linewidth=edge_features[i] * 5) plt.scatter(centers2d[:, 0], centers2d[:, 1], s=100) plt.tight_layout() plt.xlim(0, superpixels.shape[1]) plt.ylim(superpixels.shape[0], 0) plt.axis("off") plt.savefig("figures/normal_relative/%s.png" % image_name, bbox_inches="tight") plt.close()
def main(): argv = sys.argv print("loading %s ..." % argv[1]) ssvm = SaveLogger(file_name=argv[1]).load() if hasattr(ssvm, 'problem'): ssvm.model = ssvm.problem print(ssvm) if hasattr(ssvm, 'base_ssvm'): ssvm = ssvm.base_ssvm print("Iterations: %d" % len(ssvm.objective_curve_)) print("Objective: %f" % ssvm.objective_curve_[-1]) inference_run = None if hasattr(ssvm, 'cached_constraint_'): inference_run = ~np.array(ssvm.cached_constraint_) print("Gap: %f" % (np.array(ssvm.primal_objective_curve_)[inference_run][-1] - ssvm.objective_curve_[-1])) if len(argv) <= 2: argv.append("acc") if len(argv) <= 3: dataset = 'nyu' else: dataset = argv[3] if argv[2] == 'acc': ssvm.n_jobs = 1 for data_str, title in zip(["train", "val"], ["TRAINING SET", "VALIDATION SET"]): print(title) edge_type = "pairwise" if dataset == 'msrc': ds = MSRC21Dataset() data = msrc_helpers.load_data(data_str, which="piecewise_new") #data = add_kraehenbuehl_features(data, which="train_30px") data = msrc_helpers.add_kraehenbuehl_features(data, which="train") elif dataset == 'pascal': ds = PascalSegmentation() data = pascal_helpers.load_pascal(data_str, sp_type="cpmc") #data = pascal_helpers.load_pascal(data_str) elif dataset == 'nyu': ds = NYUSegmentation() data = nyu_helpers.load_nyu(data_str, n_sp=500, sp='rgbd') else: raise ValueError("Excepted dataset to be 'nyu', 'pascal' or 'msrc'," " got %s." % dataset) if type(ssvm.model).__name__ == "LatentNodeCRF": print("making data hierarchical") data = pascal_helpers.make_cpmc_hierarchy(ds, data) #data = make_hierarchical_data( #ds, data, lateral=True, latent=True, latent_lateral=False, #add_edge_features=False) else: data = add_edges(data, edge_type) if type(ssvm.model).__name__ == 'EdgeFeatureGraphCRF': data = add_edge_features(ds, data, depth_diff=True, normal_angles=True) if type(ssvm.model).__name__ == "EdgeFeatureLatentNodeCRF": data = add_edge_features(ds, data) data = make_hierarchical_data( ds, data, lateral=True, latent=True, latent_lateral=False, add_edge_features=True) #ssvm.model.inference_method = "qpbo" Y_pred = ssvm.predict(data.X) if isinstance(ssvm.model, LatentNodeCRF): Y_pred = [ssvm.model.label_from_latent(h) for h in Y_pred] Y_flat = np.hstack(data.Y) print("superpixel accuracy: %.2f" % (np.mean((np.hstack(Y_pred) == Y_flat)[Y_flat != ds.void_label]) * 100)) if dataset == 'msrc': res = msrc_helpers.eval_on_pixels(data, Y_pred, print_results=True) print("global: %.2f, average: %.2f" % (res['global'] * 100, res['average'] * 100)) #msrc_helpers.plot_confusion_matrix(res['confusion']) else: hamming, jaccard = eval_on_sp(ds, data, Y_pred, print_results=True) print("Jaccard: %.2f, Hamming: %.2f" % (jaccard.mean(), hamming.mean())) plt.show() elif argv[2] == 'plot': data_str = 'val' if len(argv) <= 4: raise ValueError("Need a folder name for plotting.") if dataset == "msrc": ds = MSRC21Dataset() data = msrc_helpers.load_data(data_str, which="piecewise") data = add_edges(data, independent=False) data = msrc_helpers.add_kraehenbuehl_features( data, which="train_30px") data = msrc_helpers.add_kraehenbuehl_features( data, which="train") elif dataset == "pascal": ds = PascalSegmentation() data = pascal_helpers.load_pascal("val") data = add_edges(data) elif dataset == "nyu": ds = NYUSegmentation() data = nyu_helpers.load_nyu("test") data = add_edges(data) if type(ssvm.model).__name__ == 'EdgeFeatureGraphCRF': data = add_edge_features(ds, data, depth_diff=True, normal_angles=True) Y_pred = ssvm.predict(data.X) plot_results(ds, data, Y_pred, argv[4])
import igraph import utils import leidenalg import pandas as pd patents = utils.read_patents() patent_classification = utils.read_patent_classification() uspatentcitations = utils.read_uspatentcitation() patents = utils.merge_patents_and_classification(patents, patent_classification) patents = patents.set_index("id") print("read graph") g = igraph.Graph() g = utils.add_edges(g, uspatentcitations) # g = g.as_undirected() print("read components") connected_components = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition) print("find subgraphs") subgraphs = connected_components.subgraphs() num_subgraphs = len(subgraphs) print("start to classify") range_assigned_patents = utils.igraph_classify_whole_graph( subgraphs, num_subgraphs, patents ) # data frame test forecasted_patents = pd.DataFrame.from_dict(range_assigned_patents, orient='index', columns=['number', 'section_id', 'forecast_section_id']) utils.write_to_csv(forecasted_patents, 'forecasted_patents')
def main(): argv = sys.argv print("loading %s ..." % argv[1]) ssvm1 = SaveLogger(file_name=argv[1]).load() ssvm2 = SaveLogger(file_name=argv[2]).load() data_str = 'val' if len(argv) <= 3: raise ValueError("Need a folder name for plotting.") print("loading data...") data = load_nyu(data_str, n_sp=500) dataset = NYUSegmentation() print("done") data1 = add_edges(data, kind="pairwise") data2 = add_edges(data, kind="pairwise") data1 = add_edge_features(dataset, data1) data2 = add_edge_features(dataset, data2, depth_diff=True) Y_pred1 = ssvm1.predict(data1.X) Y_pred2 = ssvm2.predict(data2.X) folder = argv[3] if not os.path.exists(folder): os.mkdir(folder) np.random.seed(0) for image_name, superpixels, y_pred1, y_pred2 in zip(data.file_names, data.superpixels, Y_pred1, Y_pred2): if np.all(y_pred1 == y_pred2): continue gt = dataset.get_ground_truth(image_name) perf1 = eval_on_pixels(dataset, [gt], [y_pred1[superpixels]], print_results=False)[0] perf1 = np.mean(perf1[np.isfinite(perf1)]) perf2 = eval_on_pixels(dataset, [gt], [y_pred2[superpixels]], print_results=False)[0] perf2 = np.mean(perf2[np.isfinite(perf2)]) if np.abs(perf1 - perf2) < 2: continue image = dataset.get_image(image_name) fig, axes = plt.subplots(2, 3, figsize=(12, 6)) axes[0, 0].imshow(image) axes[0, 0].imshow((y_pred1 != y_pred2)[superpixels], vmin=0, vmax=1, alpha=.7) axes[0, 1].set_title("ground truth") axes[0, 1].imshow(image) axes[0, 1].imshow(gt, alpha=.7, cmap=dataset.cmap, vmin=0, vmax=dataset.cmap.N) axes[1, 0].set_title("%.2f" % perf1) axes[1, 0].imshow(image) axes[1, 0].imshow(y_pred1[superpixels], vmin=0, vmax=dataset.cmap.N, alpha=.7, cmap=dataset.cmap) axes[1, 1].set_title("%.2f" % perf2) axes[1, 1].imshow(image) axes[1, 1].imshow(y_pred2[superpixels], alpha=.7, cmap=dataset.cmap, vmin=0, vmax=dataset.cmap.N) present_y = np.unique(np.hstack([y_pred1, y_pred2, np.unique(gt)])) present_y = np.array([y_ for y_ in present_y if y_ != dataset.void_label]) axes[0, 2].imshow(present_y[:, np.newaxis], interpolation='nearest', cmap=dataset.cmap, vmin=0, vmax=dataset.cmap.N) for i, c in enumerate(present_y): axes[0, 2].text(1, i, dataset.classes[c]) for ax in axes.ravel(): ax.set_xticks(()) ax.set_yticks(()) axes[1, 2].set_visible(False) fig.savefig(folder + "/%s.png" % image_name, bbox_inches="tight") plt.close(fig)
def crazy_visual(): dataset = NYUSegmentation() # load training data data = load_nyu(n_sp=500) data = add_edges(data) for x, image_name, superpixels, y in zip(data.X, data.file_names, data.superpixels, data.Y): print(image_name) if int(image_name) != 11: continue image = dataset.get_image(image_name) plt.figure(figsize=(20, 20)) bounary_image = mark_boundaries(image, superpixels) plt.imshow(bounary_image) gridx, gridy = np.mgrid[:superpixels.shape[0], :superpixels.shape[1]] edges = x[1] points_normals = dataset.get_pointcloud_normals(image_name) centers2d = get_superpixel_centers(superpixels) centers3d = [np.bincount(superpixels.ravel(), weights=c.ravel()) for c in points_normals[:, :, :3].reshape(-1, 3).T] centers3d = (np.vstack(centers3d) / np.bincount(superpixels.ravel())).T sp_normals = get_sp_normals(points_normals[:, :, 3:], superpixels) offset = centers3d[edges[:, 0]] - centers3d[edges[:, 1]] offset = offset / np.sqrt(np.sum(offset ** 2, axis=1))[:, np.newaxis] #mean_normal = (sp_normals[edges[:, 0]] + sp_normals[edges[:, 1]]) / 2. mean_normal = sp_normals[edges[:, 0]] #edge_features = np.arccos(np.abs((offset * mean_normal).sum(axis=1))) * 2. / np.pi edge_features = 1 - np.abs((offset * mean_normal).sum(axis=1)) no_normals = (np.all(sp_normals[edges[:, 0]] == 0, axis=1) + np.all(sp_normals[edges[:, 1]] == 0, axis=1)) edge_features[no_normals] = 0 # nan normals if True: coords = points_normals[:, :, :3].reshape(-1, 3) perm = np.random.permutation(superpixels.max()+1) mv.points3d(coords[:,0], coords[:, 1], coords[:, 2], perm[superpixels.ravel()], mode='point') #mv.points3d(centers3d[:, 0], centers3d[:, 1], centers3d[:, 2], scale_factor=.04) mv.quiver3d(centers3d[:, 0], centers3d[:, 1], centers3d[:, 2], sp_normals[:, 0], sp_normals[:, 1], sp_normals[:, 2]) mv.show() from IPython.core.debugger import Tracer Tracer()() for i, edge in enumerate(edges): e0, e1 = edge #color = (dataset.colors[y[e0]] + dataset.colors[y[e1]]) / (2. * 255.) #f = edge_features[i] #if f < 0: #e0, e1 = e1, e0 #f = -f #plt.arrow(centers[e0][0], centers[e0][1], #centers[e1][0] - centers[e0][0], centers[e1][1] - centers[e0][1], #width=f * 5 #) color = "black" plt.plot([centers2d[e0][0], centers2d[e1][0]], [centers2d[e0][1], centers2d[e1][1]], c=color, linewidth=edge_features[i] * 5 ) plt.scatter(centers2d[:, 0], centers2d[:, 1], s=100) plt.tight_layout() plt.xlim(0, superpixels.shape[1]) plt.ylim(superpixels.shape[0], 0) plt.axis("off") plt.savefig("figures/normal_relative/%s.png" % image_name, bbox_inches="tight") plt.close()