def test_difference(self): sys.stdout.write("Testing set difference\n") fileout = os.path.join(current_dir, 'pyntacletests/test_sets/tmp/result_set.adjm') expected = os.path.join(current_dir, 'pyntacletests/test_sets/output/set/result_difference.adjm') output_graph = GraphSetOps.difference(self.graph1, self.graph2, new_graph_name='result_set') PyntacleExporter.AdjacencyMatrix(graph=output_graph, file=os.path.join(current_dir, 'pyntacletests/test_sets/tmp/result_set.adjm'), sep='\t', header=True) self.assertEqual(getmd5(fileout), getmd5(expected), 'Wrong checksum for Set, difference case')
def run(self): if not self.args.suppress_cursor: cursor = CursorAnimation() cursor.daemon = True cursor.start() if not os.path.exists(self.args.input_file_1) or not os.path.exists( self.args.input_file_2): sys.stderr.write( u"One of the two input files does not exist. Quitting\n") sys.exit(1) if filecmp.cmp(self.args.input_file_1, self.args.input_file_2, shallow=False): sys.stderr.write(u"The two input files are equal. Quitting\n") sys.exit(1) input_header = True if self.args.no_header: input_header = False sys.stdout.write(import_start) input_format = format_dictionary.get(self.args.format, "NA") sys.stdout.write(u"Reading first input file\n") graph1 = GraphLoad(self.args.input_file_1, file_format=input_format, header=input_header, separator=self.args.input_separator).graph_load() sys.stdout.write(u"Reading second input file\n") graph2 = GraphLoad(self.args.input_file_2, file_format=input_format, header=input_header, separator=self.args.input_separator).graph_load() # init Utils global stuff utils1 = GraphUtils(graph=graph1) utils2 = GraphUtils(graph=graph2) if self.args.output_file is None: if self.args.which == "union": self.args.output_file = "_".join \ ([os.path.splitext(os.path.basename(self.args.input_file_1))[0], "UNION", os.path.splitext(os.path.basename(self.args.input_file_2))[0], self.date]) elif self.args.which == "intersection": self.args.output_file = "_".join \ ([os.path.splitext(os.path.basename(self.args.input_file_1))[0], "INTERSECTION", os.path.splitext(os.path.basename(self.args.input_file_2))[0], self.date]) elif self.args.which == "difference": self.args.output_file = "_".join([ os.path.splitext(os.path.basename( self.args.input_file_1))[0], "DIFFERENCE", os.path.splitext(os.path.basename( self.args.input_file_2))[0], self.date ]) if self.args.largest_component: try: graph1 = utils1.get_largest_component() sys.stdout.write( u"Taking the largest component of the input graph {0} as you requested ({1} nodes, {2} edges)\n" .format(graph2["name"], graph1.vcount(), graph1.ecount())) utils1.set_graph(graph1) except MultipleSolutionsError: sys.stderr.write( u"Graph {} has two largest components of the same size. Cannot choose one. either remove one of the components or run 'pyntacle set' without the '--largest-component' option. Quitting\n" .format(graph1["name"])) sys.exit(1) try: graph2 = utils2.get_largest_component() sys.stdout.write( u"Taking the largest component of the input graph {0} as you requested ({1} nodes, {2} edges)\n" .format(graph2["name"], graph2.vcount(), graph2.ecount())) utils2.set_graph(graph2) except MultipleSolutionsError: sys.stderr.write( u"Graph {} has two largest components of the same size. Cannot choose one. either remove one of the components or run 'pyntacle set' without the '--largest-component' option. Quitting\n" .format(graph2["name"])) sys.exit(1) # Check provided dimensions' format if self.args.plot_dim: # define custom format self.args.plot_dim = self.args.plot_dim.split(",") for i in range(0, len(self.args.plot_dim)): try: self.args.plot_dim[i] = int(self.args.plot_dim[i]) except ValueError: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) if self.args.plot_dim[i] <= 0: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) plot_size = tuple(self.args.plot_dim) else: # generate different formats according to graph size if graph1.vcount() <= 150 and graph2.vcount() <= 150: plot_size = (800, 800) else: plot_size = (1600, 1600) if self.args.format == "sif" or all( x is None for x in graph1.es()["sif_interaction"]) or all( x is None for x in graph2.es()["sif_interaction"]): sys.stdout.write( u"WARNING: Interaction stored in SIF files will be removed\n") # GraphSetOps(graph1=graph1, graph2=graph2,new_name = new_name sys.stdout.write(section_end) sys.stdout.write(run_start) if self.args.which == "union": sys.stdout.write( u" Performing union between input graph {} and {}\n".format( self.args.input_file_1, self.args.input_file_2)) output_graph = GraphSetOps.union(graph1, graph2, self.args.output_file) if all(len(x) <= 2 for x in output_graph.vs()["parent"]): sys.stdout.write( u"There were no common nodes when performing Graph union. Will return two disjoint graphs\n" ) elif self.args.which == "intersection": sys.stdout.write( u"Performing intersection between input graph {} and {}\n". format(self.args.input_file_1, self.args.input_file_2)) output_graph = GraphSetOps.intersection(graph1, graph2, self.args.output_file) if output_graph.ecount() == 0: sys.stdout.write( u"No intersection was possible for the two input graphs. No output will be generated\n" ) if not self.args.suppress_cursor: cursor.stop() sys.exit(0) elif self.args.which == "difference": sys.stdout.write( "Performing difference between input graph {} and {}\n". format(self.args.input_file_1, self.args.input_file_2)) output_graph = GraphSetOps.difference(graph1, graph2, self.args.output_file) if output_graph.vcount() == graph1.vcount( ) and output_graph.ecount() == graph1.ecount(): sys.stdout.write( u"Nothing of graph {} could be subtracted from graph {}\n". format(os.path.basename(self.args.input_file_1), os.path.basename(self.args.input_file_2))) if output_graph.vcount() == 0 and output_graph.ecount() == 0: sys.stdout.write( u"Graph difference was complete, no nodes and edges could be retrieved. No output will be produced. Quitting\n" ) sys.exit(0) if output_graph.vcount() <= 1 and output_graph.ecount() < 1: sys.stdout.write( u"Graph difference returned only node {} and no edge. No output will be produced. Quitting\n" .format("".join(output_graph.vs["name"]))) sys.exit(0) if output_graph.vcount() > 1 and output_graph.ecount() == 0: sys.stdout.write( u"Graph difference returned {} nodes, namely: {} and no edge. No output will be produced. Quitting\n" .format(output_graph.vcount(), ",\n".join(output_graph.vs()["name"]))) sys.exit(0) sys.stdout.write(section_end) sys.stdout.write(report_start) # print pyntacle_commands_utils to command line sys.stdout.write(u"Report of set operation: {}\n".format( self.args.which)) sys.stdout.write(section_end) sys.stdout.write(u"Input graphs:\n") sys.stdout.write( u"Graph 1: {0}\nNodes:\t{1}\nEdges:\t{2}\nComponents:\t{3}\n". format(graph1["name"][0], graph1.vcount(), graph1.ecount(), len(graph1.components()))) sys.stdout.write(section_end) sys.stdout.write( u"Graph 2: {0}\nNodes:\t{1}\nEdges:\t{2}\nComponents:\t{3}\n". format(graph2["name"][0], graph2.vcount(), graph2.ecount(), len(graph2.components()))) sys.stdout.write(section_end) sys.stdout.write(u"Resulting graph:\n") sys.stdout.write( u"Nodes:\t{0}\nEdges:\t{1}\nComponents:\t{2}\n".format( output_graph.vcount(), output_graph.ecount(), len(output_graph.components()))) sys.stdout.write(section_end) sys.stdout.write(report_start) if not os.path.isdir(self.args.directory): sys.stdout.write( u"WARNING: Output directory does not exist, will create one at {}\n" .format(os.path.abspath(self.args.directory))) os.makedirs(os.path.abspath(self.args.directory), exist_ok=True) out_form = format_dictionary.get(self.args.output_format, "NA") output_path = os.path.join(self.args.directory, ".".join([self.args.output_file, out_form])) sys.stdout.write(u"Basename of output graph: {}\n".format( self.args.output_file)) sys.stdout.write( u"Path to generated graph is: {}\n".format(output_path)) # producing output graph if self.args.no_output_header: sys.stdout.write(u"Skipping header on output files\n") output_header = False else: output_header = True if self.args.output_separator is None: sys.stdout.write( u"Using '\\t' as default separator for output file\n") self.args.output_separator = "\t" if os.path.exists(output_path): self.logging.warning( u"A file named {} already exist, will be overwritten".format( output_path)) # output generated networks if out_form == "adjm": sys.stdout.write( u"Writing resulting graph to an adjacency matrix\n") PyntacleExporter.AdjacencyMatrix(output_graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "egl": sys.stdout.write(u"Writing resulting graph to an edge list\n") PyntacleExporter.EdgeList(output_graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "sif": sys.stdout.write( u"Writing resulting graph to Simple Interaction Format (SIF) file\n" ) PyntacleExporter.Sif(output_graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "dot": sys.stdout.write("Writing resulting graph to a DOT file\n") # Ignore ugly RuntimeWarnings while creating a dot simplefilter("ignore", RuntimeWarning) PyntacleExporter.Dot(output_graph, output_path) elif out_form == "graph": sys.stdout.write( "Writing resulting graph into a binary file (ending in .graph)\n" ) PyntacleExporter.Binary(output_graph, output_path) # producing plots if not self.args.no_plot: # generates plot directory plot_dir = os.path.join(self.args.directory, "pyntacle-plots") if os.path.isdir(plot_dir): self.logging.warning( u"A directory named 'pyntacle-plots' already exists.") else: os.mkdir(plot_dir) sys.stdout.write(u"Generating plots in {} format\n".format( self.args.plot_format)) sys.stdout.write(u"Drawing starting graphs\n") graph1_plot_path = os.path.join( plot_dir, ".".join([ "_".join([ os.path.splitext( os.path.basename(self.args.input_file_1))[0], self.date ]), self.args.plot_format ])) graph2_plot_path = os.path.join( plot_dir, ".".join([ "_".join([ os.path.splitext( os.path.basename(self.args.input_file_2))[0], self.date ]), self.args.plot_format ])) graph1_plotter = PlotGraph(graph=graph1) graph2_plotter = PlotGraph(graph=graph2) # first create two plots of the input graph input_graph_node_size = 25 pal = sns.color_palette("hls", 10).as_hex() framepal = sns.color_palette("hls", 10, desat=0.5).as_hex() graph_1_colour = pal[0] graph_1_frame = framepal[0] graph_2_colour = pal[3] graph_2_frame = framepal[3] # set input graph node labels graph1_plotter.set_node_labels(labels=graph1.vs()["name"]) graph2_plotter.set_node_labels(labels=graph2.vs()["name"]) # set input graph node colors graph1_plotter.set_node_colors(colors=[graph_1_colour] * graph1.vcount()) graph2_plotter.set_node_colors(colors=[graph_2_colour] * graph2.vcount()) # set input graphs node sizes graph1_plotter.set_node_sizes(sizes=[input_graph_node_size] * graph1.vcount()) graph2_plotter.set_node_sizes(sizes=[input_graph_node_size] * graph2.vcount()) # set input graph vertex colors graph_1_frame_colors = [graph_1_frame] * graph1.vcount() graph_2_frame_colors = [graph_2_frame] * graph1.vcount() # define layouts graph1_plotter.set_layouts(self.args.plot_layout) graph2_plotter.set_layouts(self.args.plot_layout) # plot input graphs graph1_plotter.plot_graph(path=graph1_plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=graph_1_frame_colors) graph2_plotter.plot_graph(path=graph2_plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=graph_2_frame_colors) if output_graph.vcount() > 0: # plot output graph output_plot_path = os.path.join( plot_dir, ".".join([ "_".join([ self.args.which, self.args.output_file, self.date ]), self.args.plot_format ])) output_graph_plotter = PlotGraph( graph=output_graph) # init plotter class # for the merge part sys.stdout.write(u"Drawing resulting graphs\n") node_intersection_colour = pal[1] node_intersection_frame = framepal[1] node_intersection_size = 45 intersection_node_color_list = [] intersection_frame_color_list = [] intersection_set = [] for v in output_graph.vs(): parent_g1 = graph1["name"][0] parent_g2 = graph2["name"][0] if parent_g1 in v["parent"] and parent_g2 in v["parent"]: intersection_node_color_list.append( node_intersection_colour) intersection_frame_color_list.append( node_intersection_frame) intersection_set.append(v["name"]) elif parent_g1 in v[ "parent"] and not parent_g2 in v["parent"]: intersection_node_color_list.append(graph_1_colour) intersection_frame_color_list.append(graph_1_frame) elif parent_g2 in v[ "parent"] and not parent_g1 in v["parent"]: intersection_node_color_list.append(graph_2_colour) intersection_frame_color_list.append(graph_2_frame) output_graph_plotter.set_node_colors( colors=intersection_node_color_list) output_graph_plotter.set_node_sizes(sizes=[ node_intersection_size if parent_g1 in v["parent"] and parent_g2 in v["parent"] else input_graph_node_size for v in output_graph.vs() ]) output_graph_plotter.set_node_labels( labels=output_graph.vs()["name"]) output_graph_plotter.set_layouts(self.args.plot_layout) output_graph_plotter.plot_graph( path=output_plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=intersection_frame_color_list) else: sys.stdout.write( u"The output graph does not contain vertices. Can't draw graph\n" ) elif not self.args.no_plot and (graph1.vcount() >= 1000 or graph2.vcount() >= 1000): sys.stdout.write( u"One of the two input graphs exceeds Pyntacle limits for plotting (maximum 1000 nodes). Will not draw graph\n" ) # Report reporter1 = PyntacleReporter(graph=graph1) # init reporter1 reporter2 = PyntacleReporter(graph=graph2) # init reporter2 reporter_final = PyntacleReporter(graph=output_graph) set1_attr_dict = OrderedDict() set2_attr_dict = OrderedDict() setF_attr_dict = OrderedDict() if self.args.which == 'intersection': setF_attr_dict[ '\nCommon Nodes'] = 'Node names' #(len(intersection_set), ','.join(intersection_set)) setF_attr_dict[len(intersection_set)] = ','.join(intersection_set) reporter1.create_report(ReportEnum.Set, set1_attr_dict) reporter2.create_report(ReportEnum.Set, set2_attr_dict) reporter_final.create_report(ReportEnum.Set, setF_attr_dict) reporter1.report[1] = ['\n--- Graph 1 ---'] reporter2.report[1] = ['--- Graph 2 ---'] del (reporter1.report[-1]) del (reporter2.report[-1]) del (reporter2.report[0]) del (reporter_final.report[0]) for e in reporter_final.report: if e[0] == 'Pyntacle Command:': e[1] = e[1] + ' ' + self.args.which reporter_final.report[0] = ['\n--- Resulting Graph ---'] reporter1.report.extend(reporter2.report) reporter1.report.extend(reporter_final.report) reporter1.write_report(report_dir=self.args.directory, format=self.args.report_format) if not self.args.suppress_cursor: cursor.stop() sys.stdout.write(section_end) sys.stdout.write(u"Pyntacle set completed successfully\n") sys.exit(0)
def run(self): if not self.args.suppress_cursor: cursor = CursorAnimation() cursor.daemon = True cursor.start() sys.stdout.write(run_start) if self.args.which == "random": if self.args.nodes is None: self.args.nodes = random.randint(100, 500) else: try: self.args.nodes = int(self.args.nodes) except ValueError: sys.stderr.write( u"Number of nodes must be a positive integer. Quitting\n" ) sys.exit(1) if not self.args.probability and self.args.edges: try: self.args.edges = int(self.args.edges) u"Generating graph with random topology\nParameters:\nNumber of nodes: {0}\nNumber of edges: {1}\n".format( self.args.nodes, self.args.edges) graph = PyntacleGenerator.Random( [self.args.nodes, self.args.edges], name="Random", seed=self.args.seed) except (ValueError, TypeError, IllegalGraphSizeError): sys.stderr.write( u"Number of nodes must be a positive integer greater than 2 and number of edges must be a positive integer greater than zero. Quitting\n" ) sys.exit(1) else: if not self.args.probability: self.args.probability = 0.5 else: try: self.args.probability = float(self.args.probability) if self.args.probability > 1.0 or self.args.probability < 0.0: raise ValueError except ValueError: sys.stderr.write( u"Probability must be a float between 0 and 1. Quitting\n" ) sys.exit(1) try: sys.stdout.write( "uGenerating graph with random topology\nParameters:\nNumber of nodes: {0}\nProbability of wiring: {1}\n" .format(self.args.nodes, self.args.probability)) graph = PyntacleGenerator.Random( [self.args.nodes, self.args.probability], name="Random", seed=self.args.seed) except (ValueError, TypeError, IllegalGraphSizeError): sys.stderr.write( u"Number of nodes must be a positive integer greater than 2 and a probability must be a float between 0 and 1. Quitting\n" ) sys.exit(1) elif self.args.which == "scale-free": if self.args.nodes is None: self.args.nodes = random.randint(100, 500) else: try: self.args.nodes = int(self.args.nodes) except ValueError: sys.stderr.write( u"Number of nodes must be a positive integer. Quitting\n" ) sys.exit(1) if self.args.avg_edges is None: self.args.avg_edges = random.randint(10, 100) else: try: self.args.avg_edges = int(self.args.nodes) except ValueError: sys.stderr.write( u"Number of outgoing edges must be a positive integer. Quitting\n" ) sys.exit(1) try: sys.stdout.write( u"Generating graph with scale-free topology\nParameters:\nNumber of Nodes: {0}\nNumber of Outgoing edges: {1}\n" .format(self.args.nodes, self.args.avg_edges)) graph = PyntacleGenerator.ScaleFree( [self.args.nodes, self.args.avg_edges], name="ScaleFree", seed=self.args.seed) except (ValueError, TypeError, IllegalGraphSizeError): sys.stderr.write( u"Number of nodes and number of outgoing edges must be positive integers. Quitting\n" ) sys.exit(1) elif self.args.which == "tree": if self.args.nodes is None: self.args.nodes = random.randint(100, 500) else: try: self.args.nodes = int(self.args.nodes) except ValueError: sys.stderr.write( u"Number of nodes must be a positive integer. Quitting\n" ) sys.exit(1) if self.args.children is None: self.args.children = random.randint(2, 10) else: try: self.args.children = int(self.args.nodes) except ValueError: sys.stderr.write( u"Number of children must be a positive integer. Quitting\n" ) sys.exit(1) try: sys.stdout.write( u"Generating Graph with tree topology\nParameters:\nNumber of nodes: {0}\nChildren per node: {1}\n" .format(self.args.nodes, self.args.children)) graph = PyntacleGenerator.Tree( [self.args.nodes, self.args.children], name="Tree", seed=self.args.seed) except (ValueError, TypeError, IllegalGraphSizeError): sys.stderr.write( u"Number of nodes and number of children must be positive integers. Quitting\n" ) sys.exit(1) elif self.args.which == "small-world": #This does not happen anymore, as default is 2. if not self.args.lattice_size: self.args.lattice_size = random.randint(2, 5) if not self.args.nei: self.args.nei = random.randint(1, 5) if isinstance(self.args.lattice, str): try: self.args.lattice = int(self.args.lattice) self.args.lattice_size = int(self.args.lattice_size) self.args.nei = int(self.args.nei) self.args.probability = float(self.args.probability) if 0 < self.args.probability > 1.0: raise ValueError if self.args.lattice_size <= 1: raise ValueError if self.args.nei < 1: raise ValueError if self.args.lattice <= 1: raise ValueError except ValueError: sys.stderr.write( u"One of the parameters you specified is not the proper type or it is out of boundaries. Quitting\n" ) sys.exit(1) try: sys.stdout.write( u"Generating Graph with small-world topology\nParameters:\nInitial lattice dimensions: {0}\nLattice size: {1}\nNei (number of edges that connect each graph): {2}\nRewiring probability: {3}\n" .format(self.args.lattice, self.args.lattice_size, self.args.nei, self.args.probability)) graph = PyntacleGenerator.SmallWorld([ self.args.lattice, self.args.lattice_size, self.args.nei, self.args.probability ], name="SmallWorld", seed=self.args.seed) except (TypeError, ValueError): sys.stderr.write( u"The parameters you chose were invalid. Please check your command line. Quitting\n" ) if graph.vcount() < 2 and graph.ecount() < 1: sys.stdout.write( "Generated Graph is too small ({} nodes, {} edges). Rerun this command and tune your parameters. Quitting\n" .format(graph.ecount(), graph.ecount())) sys.exit(1) sys.stdout.write(section_end) sys.stdout.write(report_start) if not os.path.isdir(self.args.directory): sys.stdout.write( u"WARNING: output directory does not exist {} will be created\n" .format(os.path.abspath(self.args.directory))) os.makedirs(os.path.abspath(self.args.directory), exist_ok=True) if self.args.output_file is None: self.args.output_file = graph["name"][0] out_form = format_dictionary.get(self.args.output_format, "NA") if self.args.no_output_header: sys.stdout.write( u"Skipping header on output graph file, as requested\n") output_header = False else: output_header = True if out_form == "NA": sys.stderr.write( u"Output extension specified is not supported. Quitting\n") sys.exit(1) output_path = os.path.join(self.args.directory, ".".join([self.args.output_file, out_form])) sys.stdout.write(u"Path to graph : {}\n".format(output_path)) if self.args.output_separator is None: sys.stdout.write( u"Using '\\t' as default separator for output file\n") self.args.output_separator = "\t" # output generated networks if out_form == "adjm": sys.stdout.write( u"Writing generated graph to an adjacency matrix\n") PyntacleExporter.AdjacencyMatrix(graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "egl": sys.stdout.write(u"Writing generated graph to an edge list\n") PyntacleExporter.EdgeList(graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "sif": sys.stdout.write( u"Writing generated graph to a Simple Interaction Format (SIF) file\n" ) PyntacleExporter.Sif(graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "dot": sys.stdout.write(u"Writing generated graph to a DOT file\n") # Ignore ugly RuntimeWarnings while creating a dot simplefilter("ignore", RuntimeWarning) PyntacleExporter.Dot(graph, output_path) elif out_form == "graph": sys.stdout.write( u"Writing generated graph to a binary file (ending in .graph)\n" ) PyntacleExporter.Binary(graph, output_path) # Check provided dimensions' format if self.args.plot_dim: # define custom format self.args.plot_dim = self.args.plot_dim.split(",") for i in range(0, len(self.args.plot_dim)): try: self.args.plot_dim[i] = int(self.args.plot_dim[i]) except ValueError: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) if self.args.plot_dim[i] <= 0: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) plot_size = tuple(self.args.plot_dim) else: # generate different formats according to graph size if graph.vcount() <= 150: plot_size = (800, 800) else: plot_size = (1600, 1600) if not self.args.no_plot and graph.vcount() < 1000: sys.stdout.write(u"Drawing generated graph\n") # generates plot directory plot_dir = os.path.join(self.args.directory, "pyntacle-plots") if not os.path.isdir(plot_dir): os.mkdir(plot_dir) plot_path = os.path.join( plot_dir, ".".join([self.args.output_file, self.args.plot_format])) pal = sns.color_palette("Spectral", 10).as_hex() pal2 = sns.color_palette("RdYlGn", 10).as_hex() framepal = sns.color_palette("Spectral", 10, desat=0.5).as_hex() framepal2 = sns.color_palette("RdYlGn", 10, desat=0.5).as_hex() other_nodes_size = 18 # deep sky blue plot_graph = PlotGraph(graph=graph) # define layout according to the toplogy of the graph if self.args.which == "random": if self.args.plot_layout != "random": plot_graph.set_layouts(self.args.plot_layout) else: plot_graph.set_layouts(layout="random") other_nodes_colour = pal[-3] frame_vertex_colour = framepal[-3] elif self.args.which == "scale-free": if self.args.plot_layout != "fr" and self.args.plot_layout != "fruchterman_reingold": plot_graph.set_layouts(self.args.plot_layout) else: plot_graph.set_layouts(layout="fr") other_nodes_colour = pal[3] frame_vertex_colour = framepal[3] elif self.args.which == "tree": if self.args.plot_layout != "rt" and self.args.plot_layout != "reingold_tilford": plot_graph.set_layouts(self.args.plot_layout) else: plot_graph.set_layouts(layout="reingold_tilford") other_nodes_colour = pal2[-2] frame_vertex_colour = framepal2[-2] else: if self.args.plot_layout != "circle": plot_graph.set_layouts(self.args.plot_layout) else: plot_graph.set_layouts(layout="circle") other_nodes_colour = pal[0] frame_vertex_colour = framepal[0] node_colors = [other_nodes_colour] * graph.vcount() plot_graph.set_node_colors(colors=node_colors) plot_graph.set_node_labels( labels=graph.vs()["name"]) # assign node labels to graph node_sizes = [other_nodes_size] * graph.vcount() plot_graph.set_node_sizes(sizes=node_sizes) frame_vertex_colour = [frame_vertex_colour] * graph.vcount() sys.stdout.write( u"Drawing graph in {} format at path: {}\n".format( self.args.plot_format, plot_path)) plot_graph.plot_graph(path=plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=frame_vertex_colour) elif not self.args.no_plot and graph.vcount() >= 1000: self.logging.warning( u"Graph is above Pyntacle plotting capability ({} nodes, we plot graph with at best 1000 nodes). Graph plotting will be skipped." .format(graph.vcount())) if not self.args.suppress_cursor: cursor.stop() sys.stdout.write(section_end) sys.stdout.write(u"Pyntacle generate completed successfully\n") if self.args.repeat == 1: sys.exit(0)
def run(self): if not self.args.suppress_cursor: cursor = CursorAnimation() cursor.daemon = True cursor.start() if self.args.input_file is None: sys.stderr.write( u"Please specify an input file using the `-i/--input-file` option. Quitting\n" ) sys.exit(1) if not os.path.exists(self.args.input_file): sys.stderr.write(u"Cannot find {}. Is the path correct?".format( self.args.input_file)) sys.exit(1) input_header = True if self.args.no_header: input_header = False input_format = format_dictionary.get(self.args.format, "NA") sys.stdout.write(import_start) sys.stdout.write(u"Importing graph from file\n") graph = GraphLoad(self.args.input_file, file_format=input_format, header=input_header, separator=self.args.input_separator).graph_load() # init Utils global stuff utils = GraphUtils(graph=graph) if self.args.largest_component: try: graph = utils.get_largest_component() sys.stdout.write( u"Taking the largest component of the input graph as you requested ({} nodes, {} edges)\n" .format(graph.vcount(), graph.ecount())) utils.set_graph(graph) except MultipleSolutionsError: sys.stderr.write( u"The graph has two largest components of the same size. Cannot choose one. Please parse your file or remove the '--largest-component' option. Quitting\n" ) sys.exit(1) # define plot sizes if self.args.plot_dim: # define custom format self.args.plot_dim = self.args.plot_dim.split(",") for i in range(0, len(self.args.plot_dim)): try: self.args.plot_dim[i] = int(self.args.plot_dim[i]) except ValueError: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) if self.args.plot_dim[i] <= 0: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) plot_size = tuple(self.args.plot_dim) else: # generate different formats according to graph size if graph.vcount() <= 150: plot_size = (800, 800) else: plot_size = (1600, 1600) # initialize module finder method communities = CommunityFinder(graph=graph) # initialize Reporter results = OrderedDict() if self.args.which == "fastgreedy": if self.args.weights is not None: # import edge attributes if not os.path.exists(self.args.weights): sys.stderr.write( u"Attribute file {} does not exist. Quitting\n".format( self.args.weights)) sys.exit(1) else: ImportAttributes.import_edge_attributes( graph, self.args.weights, sep=separator_detect(self.args.weights), mode=self.args.weights_format) weights = [ float(x) if x is not None else 1.0 for x in graph.es()["weights"] ] else: weights = None if self.args.clusters is not None: try: self.args.clusters = int(self.args.clusters) except: sys.stderr.write( u"argument of '--clusters' must be an integer. Quitting\n" ) sys.exit(1) sys.stdout.write(section_end) sys.stdout.write(run_start) sys.stdout.write( u"Finding communities using the fastgreedy algorithm\n") communities.fastgreedy(weights=weights, n=self.args.clusters) mods = communities.get_modules algorithm = "fastgreedy" elif self.args.which == "infomap": sys.stdout.write(section_end) sys.stdout.write(run_start) sys.stdout.write( u"Finding communities using the infomap (naive) algorithm\n") communities.infomap() mods = communities.get_modules algorithm = "infomap" elif self.args.which == "leading-eigenvector": sys.stdout.write(section_end) sys.stdout.write(run_start) sys.stdout.write( u"Finding communities using the leading-eigenvector algorithm\n" ) communities.leading_eigenvector() mods = communities.get_modules algorithm = "leading-eigenvector" elif self.args.which == "community-walktrap": try: self.args.steps = int(self.args.steps) except: sys.stderr.write( u"Argument of '--steps' must be an integer. Quitting\n") sys.exit(1) if self.args.weights is not None: # import edge attributes if not os.path.exists(self.args.weights): sys.stderr.write( u"Weights file {} does not exist. Quitting\n".format( self.args.weights)) sys.exit(1) else: ImportAttributes.import_edge_attributes( graph, self.args.weights, sep=separator_detect(self.args.weights), mode=self.args.weights_format) weights = [ float(x) if x != None else 1.0 for x in graph.es()["weights"] ] else: weights = None if self.args.clusters is not None: try: self.args.clusters = int(self.args.clusters) except: sys.stderr.write( u"Argument of '--clusters' must be an integer. Quitting\n" ) sys.exit(1) sys.stdout.write(section_end) sys.stdout.write(run_start) sys.stdout.write( u"Finding communities using the walktrap algorithm and a walker of {} steps\n" .format(self.args.steps)) communities.community_walktrap(weights=weights, n=self.args.clusters, steps=self.args.steps) mods = communities.get_modules algorithm = "community-walktrap" mods_report = [] if not mods: sys.stderr.write(u"No communities found. Quitting.") sys.exit(1) for i, elem in enumerate(mods): mods_report.append("\t".join([ str(x) for x in [i, elem.vcount(), elem.ecount(), len(elem.components())] ]) + "\n") sys.stdout.write(section_end) sys.stdout.write(summary_start) sys.stdout.write( u"Pyntacle - Community finding report:\nAlgorithm:{0}\nTotal number of communities found:" "\t{1}\nIndex\tNodes\tEdges \tComponents\n{2}".format( algorithm, len(mods), "".join(mods_report))) # initialize Moduleutils class mod_utils = ModuleUtils(modules=mods) if not all(x is None for x in [ self.args.min_nodes, self.args.max_nodes, self.args.min_components, self.args.max_components ]): init_mods = len(mods) if self.args.min_nodes is not None: try: self.args.min_nodes = int(self.args.min_nodes) except: sys.stderr.write( u"Argument of '--min-nodes' must be an integer. Quitting\n" ) sys.exit(1) if self.args.max_nodes is not None: try: self.args.max_nodes = int(self.args.max_nodes) except: sys.stderr.write( u"Argument of '--max-nodes' must be an integer. Quitting\n" ) sys.exit(1) if self.args.max_components is not None: try: self.args.max_components = int(self.args.max_components) except: sys.stderr.write( u"Argument of '--max-components' must be an integer. Quitting\n" ) sys.exit(1) if self.args.min_components is not None: try: self.args.min_components = int(self.args.min_components) except: sys.stderr.write( u"Argument of '--min-components' must be an integer. Quitting\n" ) sys.exit(1) mod_utils.filter_subgraphs(min_nodes=self.args.min_nodes, max_nodes=self.args.max_nodes, min_components=self.args.min_components, max_components=self.args.max_components) if len(mod_utils.modules) > 0: sys.stdout.write( u"Filtered out {0} communities. Keeping {1} communities\n". format((init_mods - len(mod_utils.modules)), len(mod_utils.modules))) else: sys.stdout.write( u"No community could be kept using the current filters. Quitting\n" ) sys.exit(0) else: sys.stdout.write( u"No filters specified. All modules will be kept\n") sys.stdout.write(section_end) mod_utils.label_modules_in_graph(graph=graph) final_mods = mod_utils.get_modules() for elem in final_mods: results[elem["module"]] = [ elem.vcount(), elem.ecount(), len(elem.components()) ] sys.stdout.write(report_start) # producing output graph if self.args.no_output_header: sys.stdout.write( u"Skipping header writing on output graph community files\n") output_header = False else: output_header = True if not os.path.isdir(self.args.directory): sys.stdout.write( u"WARNING: output directory does not exists {} will be created\n" .format(os.path.abspath(self.args.directory))) os.makedirs(os.path.abspath(self.args.directory), exist_ok=True) # dictionary that stores the extension of the output file if self.args.output_format is None: self.args.output_format = self.args.format out_form = format_dictionary.get(self.args.output_format, "NA") if self.args.output_file is None: # insert random name generator self.args.output_file = "_".join( ["pyntacle", graph["name"][0], algorithm]) sys.stdout.write( u"Basename of the output modules will be {} (default)\n". format(self.args.output_file)) output_basename = os.path.join(self.args.directory, self.args.output_file) # output generated networks sys.stdout.write( "Writing resulting communities to the specified network file format\n" ) for elem in final_mods: output_path = ".".join([ "_".join([output_basename, str(elem["module"]), self.date]), out_form ]) try: if out_form == "adjm": PyntacleExporter.AdjacencyMatrix( elem, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "egl": PyntacleExporter.EdgeList(elem, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "sif": PyntacleExporter.Sif(elem, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "dot": # Ignore ugly RuntimeWarnings while creating a dot simplefilter("ignore", RuntimeWarning) PyntacleExporter.Dot(elem, output_path) elif out_form == "bin": PyntacleExporter.Binary(elem, output_path) except UnsupportedGraphError: sys.stdout.write( "Module {0} was skipped because it is too small ({1} nodes, {2} edges), use the `--save-binary` flag to retrieve it\n" .format(elem["module"], elem.vcount(), elem.ecount())) # reporting and plotting part sys.stdout.write(u"Producing report in {} format\n".format( self.args.report_format)) r = PyntacleReporter(graph=graph) report_type = ReportEnum.Communities results["algorithm"] = algorithm r.create_report(report_type=report_type, report=results) r.write_report(report_dir=self.args.directory, format=self.args.report_format) # save the original graph into a binary file if self.args.save_binary: binary_name = ".".join([ "_".join([ os.path.splitext(os.path.basename( self.args.input_file))[0], "communities" ]), "graph" ]) binary_path = os.path.join(self.args.directory, binary_name) sys.stdout.write( u"Storing the input graph with module labels into a binary file in the results directory\n" .format(binary_path)) if not self.args.no_plot: plot_dir = os.path.join(self.args.directory, "pyntacle-plots") if os.path.isdir(plot_dir): self.logging.info( u"A directory named \"pyntacle-plots\" already exists.") else: os.mkdir(plot_dir) avail_colors_fill = sns.color_palette( "Spectral", n_colors=len( final_mods)).as_hex() # available colors for node fill avail_colors_borders = sns.color_palette("Spectral", n_colors=len(final_mods), desat=0.5).as_hex() if graph.vcount() < 1000: sys.stdout.write(u"Plotting graph in {} format\n".format( self.args.plot_format)) main_plot_path = os.path.join( plot_dir, ".".join([ "_".join([ self.args.which, os.path.splitext( os.path.basename(self.args.input_file))[0], "communities", self.date ]), self.args.plot_format ])) # initialize general graph Drawer sys.stdout.write( u"Drawing original graph, highlighting communities\n") if len(final_mods) > 20: sys.stdout.write( u"WARNING:The number of modules found ({}) is very high. The plot of the input graph will have nuanced colors\n" .format(len(final_mods))) graph_plotter = PlotGraph(graph=graph) graph_plotter.set_node_labels(labels=graph.vs()["name"]) graph_plotter.set_node_sizes([30] * graph.vcount()) # define different colors for each module not_in_module_colors = "#A9A9A9" col_list = [] bord_list = [] for elem in graph.vs(): module = elem["module"] if module is not None: col_list.append(avail_colors_fill[module]) bord_list.append(avail_colors_borders[module]) else: col_list.append(not_in_module_colors) bord_list.append(not_in_module_colors) graph_plotter.set_node_colors(col_list) graph_plotter.set_layouts(self.args.plot_layout) graph_plotter.plot_graph(path=main_plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=bord_list) else: sys.stdout.write( u"Input graph is above Pyntacle plotting limit ({} nodes found, only graphs with at best 1000 nodes). Input graph will not be plotted\n" .format(graph.vcount())) sys.stdout.write("Drawing each module separately\n") for i, comm in enumerate(final_mods): if comm.vcount() <= 1000: plotter = PlotGraph(graph=comm) plotter.set_node_labels(labels=comm.vs()["name"]) plotter.set_node_colors([avail_colors_fill[i]] * comm.vcount()) plotter.set_node_sizes([30] * comm.vcount()) comm_plot_path = os.path.join( plot_dir, ".".join([ "_".join([ self.args.output_file, str(comm["module"]), self.date ]), self.args.plot_format ])) plotter.set_layouts(self.args.plot_layout) plotter.plot_graph( path=comm_plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=[avail_colors_borders[i]] * comm.vcount()) else: sys.stdout.write( u"Module {0} is above Pyntacle plotting limit ({1} nodes found, communities with at best 1000 nodes are plotted). Plotting of this module will be skipped\n" .format(i, comm.vcount())) if not self.args.suppress_cursor: cursor.stop() sys.stdout.write(section_end) sys.stdout.write(u"Pyntacle communities completed successfully\n") sys.exit(0)
def run(self): # dictionary that stores the basename of the output file if not self.args.suppress_cursor: cursor = CursorAnimation() cursor.daemon = True cursor.start() if self.args.no_header: header = False else: header = True if self.args.no_output_header: output_header = False else: output_header = True if self.args.input_file is None: sys.stderr.write( u"Please specify an input file using the `-i/--input-file` option. Quitting\n" ) sys.exit(1) if not os.path.exists(self.args.input_file): sys.stderr.write(u"Cannot find {}. Is the path correct?".format( self.args.input_file)) sys.exit(1) if self.args.input_separator is None: separator = separator_detect(self.args.input_file) else: separator = self.args.input_separator sys.stdout.write(run_start) sys.stdout.write( u"Converting input file {0} to requested output file: {1}\n". format(os.path.basename(self.args.input_file), os.path.basename(self.args.output_file))) out_form = format_dictionary.get(self.args.output_format, "NA") if self.args.output_file is None: self.args.output_file = os.path.splitext( os.path.basename(self.args.input_file))[0] sys.stdout.write( u"Output file name will be the basename of the input file ({})\n" .format(self.args.output_file)) # print(self.args.output_file) if self.args.output_separator is None: sys.stdout.write( u"Using the field separator used in the input network file in the converted output file, if the desired output format requires field separator\n" ) self.args.output_separator = separator if not os.path.isdir(self.args.directory): sys.stdout.write( u"WARNING: output directory does not exist, will create one at {}\n" .format(os.path.abspath(self.args.directory))) os.makedirs(os.path.abspath(self.args.directory), exist_ok=True) if out_form == "NA": sys.stderr.write( u"Output extension specified is not supported, see '--help' for more info. Quitting\n" ) sys.exit(1) output_path = os.path.join(self.args.directory, ".".join([self.args.output_file, out_form])) init_graph = GraphLoad(input_file=self.args.input_file, file_format=format_dictionary.get( self.args.format, "NA"), header=header, separator=self.args.input_separator) input_basename = os.path.basename(self.args.input_file) # special cases: #1: convert an edgelist to a sif file if format_dictionary.get(self.args.format, "NA") == "egl" and out_form == "sif": sys.stdout.write( u"Converting edge list to Simple Interaction Format (SIF)\nFull path to the output file:\n{}\n" .format(output_path)) PyntacleConverter.edgelistToSif( file=self.args.input_file, sep=separator, output_sep=self.args.output_separator, header=output_header, output_file=output_path) #2: convert a sif to an edgelist file elif format_dictionary.get(self.args.format, "NA") == "sif" and out_form == "egl": sys.stdout.write( u"Converting Simple Interaction Format (SIF) to edge list\nFull path to the output file:\n{}\n" .format(output_path)) PyntacleConverter.sifToEdgelist( file=self.args.input_file, sep=separator, output_sep=self.args.output_separator, header=output_header, output_file=output_path) else: graph = init_graph.graph_load() in_form = init_graph.get_format() if in_form == out_form: sys.stderr.write( u"The output format specified is the same as the input format. Quitting\n" ) sys.exit(1) if out_form == "adjm": sys.stdout.write( u"Converting input graph file {0} to adjacency matrix at full path:\n{1}\n" .format(input_basename, output_path)) PyntacleExporter.AdjacencyMatrix( graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "egl": sys.stdout.write( u"Converting input graph file {0} to edge list at full path:\n{1}\n" .format(input_basename, output_path)) PyntacleExporter.EdgeList(graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "sif": sys.stdout.write( u"Converting input graph file {0} to Simple Interaction Format (SIF) file at full path:\n{1}\n" .format(input_basename, output_path)) PyntacleExporter.Sif(graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "dot": # Ignore ugly RuntimeWarnings while converting to dot simplefilter("ignore", RuntimeWarning) sys.stdout.write( u"Converting input graph file {0} to DOT file using igraph utilities at full path:\n{1}\n(output separator will be ignored)\n" .format(input_basename, output_path)) PyntacleExporter.Dot(graph, output_path) elif out_form == "graph": sys.stdout.write( u"Converting input graph file {0} to a binary file (ending in .graph) at full path:\n{1}\n(output separator will be ignored)\n" .format(input_basename, output_path)) PyntacleExporter.Binary(graph, output_path) if not self.args.suppress_cursor: cursor.stop() sys.stdout.write(section_end) sys.stdout.write( u"Pyntacle convert completed successfully. Ending\n".format( os.path.basename(self.args.input_file))) sys.exit(0)