def read_input_file(self): nums = self.read_numbers('Cannot parse the number of input graphs.', 1) num_cases = nums[0] edge_sets = [] for i in range(num_cases): self.case_num = i+1 es = self.read_input_graph() G = graph.make_graph(es) G.search() if not G.edges_in_one_component(): raise self.exception('Disconnected graph: after reading '+ 'the last edge of this graph, the edges are not in '+ 'the same component.') edge_sets.append(es) self.case_num += 1 self.readline() if len(self.line) > 0: raise self.exception_with_expected('Extra lines after Graph ' + '#{0} (line 1 says the number of graphs is {0}).'. format(num_cases), 'Expecting EOF.') return edge_sets
def prediction(line_token: str, save_dir: str) -> None: ''' 日別感染者数の【予測】グラフを作成します。 Args: line_token (str): POSTするLINEのトークン save_dir (str): 一時データを保存するディレクトリパス ''' try: body = get_requests( 'https://covid19-japan-web-api.now.sh/api/v1/total?predict=true') except Exception as error: print(f'error:{error.args}') return history_path = os.path.join(save_dir, 'history_prodiction.json') graph_image_path = os.path.join(save_dir, 'graph_prodiction.png') if os.path.isfile(history_path): old_body = json_read(history_path) else: old_body = [] if body != old_body: make_graph( body, graph_image_path, 'COVID - 19 days to measure the number of infected persons') text = '日別感染者数の予測グラフ' post_line(line_token, text, graph_image_path) json_write(body, history_path)
def test(): graph = graph_utils.make_graph([('a','b'), ('a','d'), ('b','c'), ('c','d'), ('d', 'e'), ('c', 'e'), ('f','f')]) print([node.key for node in topsort(graph)]) graph = graph_utils.make_graph([(7,11), (5,11), (7,8), (3,8), (3,10), (11,2), (11,9), (11,10), (8,9)]) print([node.key for node in topsort(graph)]) graph = graph_utils.make_graph([('g','h'), ('a', 'h'), ('a', 'b'), ('b', 'c'), ('c', 'f'), ('d', 'c'), ('d', 'e'), ('e', 'f'), ('i', 'i')]) print([node.key for node in topsort_dfs(graph)]) graph = graph_utils.make_graph([(1,2), (2,3), (3,1)]) assert(topsort(graph) == None)
def test(): graph = graph_utils.make_graph([(1, 2), (2, 3), (3, 4), (3, 5), (4, 6), (5, 7), (6, 3), (7, 6), (7, 3)]) print('first test: ') sccs = tarjans(graph) print_components(sccs) graph = graph_utils.make_graph([(1, 2), (2, 4), (4, 3), (3, 1), (5, 3), (5, 4), (5, 6), (6, 7), (7, 5), (9, 8), (8, 10), (10, 9), (10, 11), (11, 10), (12, 13), (11, 13)]) print('second test: ') sccs = tarjans(graph) print_components(sccs)
def readXYZ(xyz, bonds=None, cluster_bond=None, fixed_atoms=None): # extract molecule information from xyz mol = next(pb.readfile('xyz', xyz)) reactant_atom = [a.OBAtom.GetAtomicNum() for a in mol] # Manually give bond information # (Because in metal system the bond information detect by openbabel usually have some problem) if bonds or cluster_bond: m = Molecule(pb.ob.OBMol()) obmol = m.OBMol obmol.BeginModify() for atom in mol: coords = [coord for coord in atom.coords] atomno = atom.atomicnum obatom = ob.OBAtom() obatom.thisown = 0 obatom.SetAtomicNum(atomno) obatom.SetVector(*coords) obmol.AddAtom(obatom) del obatom if cluster_bond: bonds = [(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond.GetBondOrder()) for bond in pb.ob.OBMolBondIter(mol.OBMol)] #bonds = imaginary_bond(bonds, reactant_atom, fixed_atoms) bonds.extend(cluster_bond) for bond in bonds: obmol.AddBond(bond[0], bond[1], bond[2]) # obmol.ConnectTheDots() obmol.PerceiveBondOrders() # obmol.SetTotalSpinMultiplicity(1) obmol.SetTotalCharge(int(mol.charge)) obmol.Center() obmol.EndModify() mol_obj = gen3D.Molecule(obmol) reactant_graph = Species(xyz_file_to_atoms(xyz)) reactant_bonds = [(i[0] - 1, i[1] - 1) for i in bonds] make_graph(reactant_graph, bond_list=reactant_bonds) else: mol_obj = gen3D.Molecule(mol.OBMol) reactant_graph = Species(xyz_file_to_atoms(xyz)) reactant_bonds = tuple( sorted([(bond.GetBeginAtomIdx() - 1, bond.GetEndAtomIdx() - 1) for bond in pb.ob.OBMolBondIter(mol.OBMol)])) make_graph(reactant_graph, bond_list=reactant_bonds) return mol_obj, reactant_graph
def test(): graph = graph_utils.make_graph([('a', 'b'), ('a', 'd'), ('b', 'c'), ('c', 'd'), ('d', 'e'), ('c', 'e'), ('f', 'f')]) print([node.key for node in topsort(graph)]) graph = graph_utils.make_graph([(7, 11), (5, 11), (7, 8), (3, 8), (3, 10), (11, 2), (11, 9), (11, 10), (8, 9)]) print([node.key for node in topsort(graph)]) graph = graph_utils.make_graph([('g', 'h'), ('a', 'h'), ('a', 'b'), ('b', 'c'), ('c', 'f'), ('d', 'c'), ('d', 'e'), ('e', 'f'), ('i', 'i')]) print([node.key for node in topsort_dfs(graph)]) graph = graph_utils.make_graph([(1, 2), (2, 3), (3, 1)]) assert (topsort(graph) == None)
def read_input_file(self): nums = self.read_numbers('Cannot parse the number of input graphs.', 1) num_cases = nums[0] edge_sets = [] for i in range(num_cases): self.case_num = i + 1 es = self.read_input_graph() G = graph.make_graph(es) G.search() if not G.edges_in_one_component(): raise self.exception( 'Disconnected graph: after reading ' + 'the last edge of this graph, the edges are not in ' + 'the same component.') edge_sets.append(es) self.case_num += 1 self.readline() if len(self.line) > 0: raise self.exception_with_expected( 'Extra lines after Graph ' + '#{0} (line 1 says the number of graphs is {0}).'.format( num_cases), 'Expecting EOF.') return edge_sets
def find_mlst(self, edge_set, branch_threshold=3): """ Returns an output_edge_set that represents the MLST ----------------------------------------------------------- 1. Finds forest from edge_set, 2. Connects the forests, and 3. Incrementally adds the remaining unused edges to to the original graph greedily ----------------------------------------------------------- """ # make graph from edge set input_graph = graph.make_graph(edge_set) input_graph.search() # 1. Get forest from graph leafy_forest, vertex_sets, degrees = self.find_leafy_forest(input_graph, branch_threshold) # 2. Connect the forest output_edge_set = self.connect_forest(input_graph, leafy_forest, vertex_sets, degrees) remaining_edges = edge_set.difference(output_edge_set) # 3. Incrementally update output_edge_set = self.incrementally_update(output_edge_set, remaining_edges) return output_edge_set
def experiment(edge_set, mlst_handler, experiment_name="Experiment:", experiment_desc=None, display=False): """ Runs the experiment with a given handler and edge set """ # run the experiment print ">>> %s" %(str(experiment_name)); stime = time.time() print "--------------------------------------------------------------" output_edge_set = mlst_handler.find_mlst(edge_set) assert util.is_mlst(output_edge_set) != False print "--------------------------------------------------------------" etime = time.time(); duration = int((etime - stime) * 1000) output_graph = graph.make_graph(output_edge_set) output_graph.search() stats = output_graph.stats() print ">>> Average Degree: %s" %(str(stats["average_degree"])) print ">>> Number of Leaves: %s" %(str(output_graph.num_leaves)) print "<<< Time Elapsed: %d ms" %(duration) print "\n" # display input/output graph if display: util.display(edge_set) if display: util.display(output_edge_set) stats = {} stats["duration"] = duration stats["output_edge_set"] = output_edge_set return stats
def getMessage(): page_source = get_html_from_labpage() num_lab_dict = get_num_lab_dict(page_source) sorted_num_lab_list = sorted(num_lab_dict.items(), key=lambda x: x[1], reverse=True) make_graph(copy.copy(num_lab_dict), "./image/figure.png") make_sorted_graph(copy.copy(sorted_num_lab_list), "./image/sorted.png") before_num_lab_dict = {} with open('num_lab.json') as file: before_num_lab_dict = json.load(file) ret_str = get_str_numjson_diff(before_num_lab_dict, num_lab_dict) dict2jsonfile(num_lab_dict, 'num_lab.json') return ret_str
def read_output_graph(self, in_edge_set): nums = self.read_numbers('Cannot parse the number of edges.', 1) Gin = graph.make_graph(in_edge_set) Gin.search() if nums[0] != Gin.num_nodes - 1: raise self.exception( ('Input graph has {0} non-isolated ' + 'nodes, output graph should have {1} edges, ' + 'got {2} instead.').format(Gin.num_nodes, Gin.num_nodes - 1, nums[0])) out_edge_set = set() num_edges = nums[0] for i in range(num_edges): nums = self.read_numbers('Cannot parse the next edge.', 2) e = graph.Edge(nums[0], nums[1]) if e not in in_edge_set: raise self.exception(('Edge {0} in the output graph is ' + 'absent in the input graph.').format(e)) if e in out_edge_set: raise self.exception(('Edge {0} (or its reverse) is ' + 'duplicated.').format(e)) out_edge_set.add(e) Gout = graph.make_graph(out_edge_set) Gout.search() if Gout.num_nodes != Gin.num_nodes: raise self.exception( ('After reading the last edge, the number ' + 'of non-isolated nodes in the output graph ({0}) ' + 'should equal that of the input graph ({1}) to be ' + 'a spanning tree.').format(Gout.num_nodes, Gin.num_nodes)) if not Gout.edges_in_one_component: raise self.exception( 'Disconnected graph: after reading the last edge, the output graph should be connected to be a spanning tree.' ) if Gout.has_cycle: raise self.exception( 'Cycle detected: after reading the last edge, the output graph should not have cycles to be a spanning tree.' ) return Gout.num_leaves
def is_mlst(edge_set): """ Get number of leaves if graph is mlst else return False """ import graph graph = graph.make_graph(edge_set) graph.search() return graph.num_leaves if (graph.edges_in_one_component() and len(graph.get_edge_set()) == (graph.num_nodes - 1) and not graph.has_cycle) else False
def test(): cost = { (1,2) : 7, (1,3) : 9, (1,6) : 14, (2,4) : 15, (2,3) : 10, (6,3) : 2, (6,5) : 9, (3, 4) : 11, (5,4) : 6 } cost = graph_utils.weights(cost) g = graph_utils.make_graph(cost.keys()) dijkstra(g, cost, g.vertices[1]) for vertex in g.vertices.values(): print(vertex)
def take_samples(sensor): sample_count = 0 filename = 'data-%s.bin' % datetime.now().strftime("%Y%m%d-%H%M%S") filepath = path.join(DATA_DIRECTORY, filename) print "Trying to collect samples in %s:" % filename with open(filepath, 'wb') as f: while Globals.State: take_sample(f, sensor) sample_count += 1 if sample_count % 100 == 0: stdout.write(".") stdout.flush() time.sleep(0.01) print # If the file is empty, delete it samples = stat(filepath).st_size / 4 print if samples == 0: print "No data collected" remove(filepath) else: print "Wrote out %d samples to %s" % (samples, filename) pngpath = filepath[:-3] + 'png' print "Making %s" % pngpath, make_graph(filepath, pngpath) print "Done" print "Sending mail to %s.." % NOTIFY_EMAIL, mail( NOTIFY_EMAIL, "Report - %s" % filename, "Got %d samples for file %s" % (samples, filename), attach=filepath, image=pngpath ) if STATUS_LED is not None: print "Blinking status LED", for i in range(3): GPIO.output(STATUS_LED, GPIO.HIGH) time.sleep(0.2) print ".", GPIO.output(STATUS_LED, GPIO.LOW) time.sleep(0.2) print "." print "Done"
def test_centrality(centrality_f, reverse=False): node_good = (pnode_good, version_good) node_bad = (pnode_bad, version_bad) dg_good, nodes_good = graph.make_graph(good) name = histogram.get_name(dg_good, node_good) if reverse: rank_good = centrality_f(dg_good.reverse(copy=True)) else: rank_good = centrality_f(dg_good) counts_good = histogram.aggregate(dg_good, rank_good) kdes_good = histogram.counts_to_kdes(counts_good) # REMOVE print counts_good[name] print "mean", np.mean(counts_good[name]) print print print ">>>>>GOOD ON GOOD" r_good = rank_good[node_good] pre_good = histogram.kde_predict(kdes_good[name], rank_good[node_good]) print node_good, name, r_good, pre_good dg_bad, nodes_bad = graph.make_graph(bad) name = histogram.get_name(dg_bad, node_bad) if histogram.SHOULD_HARDCODE_GCC: name = "/usr/bin/gcc" print ">>>>>BAD ON GOOD" if reverse: rank_bad = centrality_f(dg_bad.reverse(copy=True)) else: rank_bad = centrality_f(dg_bad) r_bad = rank_bad[node_bad] pre_bad = histogram.kde_predict(kdes_good[name], rank_bad[node_bad]) print node_bad, name, r_bad, pre_bad print ">>>>>COMPARISON" print "diff (good - bad)", name, "rank diff", r_good - r_bad, "prediction diff", pre_good - pre_bad
def test(): edges = [('b', 'a'), ('c', 'b'), ('d', 'c'), ('e', 'd'), ('a', 'e'), ('g', 'a'), ('g', 'b'), ('h', 'b'), ('h', 'f'), ('h', 'g'), ('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e'), ('e', 'a'), ('a', 'g'), ('b', 'g'), ('b', 'h'), ('f', 'h'), ('g', 'h'), ('h', 'i'), ('i', 'h'), ('g', 'i'), ('i', 'g'), ('f', 'i'), ('i', 'f'), ('g', 'f'), ('f', 'g'), ('c', 'h'), ('h', 'c'), ('d', 'f'), ('f', 'd'), ('i', 'e'), ('e', 'i')] graph = graph_utils.make_graph(edges) clique = max_clique(graph) print([v.key for v in clique])
def take_samples(sensor): sample_count = 0 filename = 'data-%s.bin' % datetime.now().strftime("%Y%m%d-%H%M%S") filepath = path.join(DATA_DIRECTORY, filename) print "Trying to collect samples in %s:" % filename with open(filepath, 'wb') as f: while Globals.State: take_sample(f, sensor) sample_count += 1 if sample_count % 100 == 0: stdout.write(".") stdout.flush() time.sleep(0.01) print # If the file is empty, delete it samples = stat(filepath).st_size / 4 print if samples == 0: print "No data collected" remove(filepath) else: print "Wrote out %d samples to %s" % (samples, filename) pngpath = filepath[:-3] + 'png' print "Making %s" % pngpath, make_graph(filepath, pngpath) print "Done" print "Sending mail to %s.." % NOTIFY_EMAIL, mail(NOTIFY_EMAIL, "Report - %s" % filename, "Got %d samples for file %s" % (samples, filename), attach=filepath, image=pngpath) if STATUS_LED is not None: print "Blinking status LED", for i in range(3): GPIO.output(STATUS_LED, GPIO.HIGH) time.sleep(0.2) print ".", GPIO.output(STATUS_LED, GPIO.LOW) time.sleep(0.2) print "." print "Done"
def main(): sys.setrecursionlimit(5000) root = get_relation("wypadek drogowy", "n", "hypernym", 1) my_graph = graph.make_graph(root) nx.draw(my_graph, with_labels=True) plt.show() root = get_relation("wypadek", "n", "hyponym", 1) print("direct hyponym") for child in root.children: print(str(child.id) + " " + "{0}".format(child.synonyms)) print("2nd level hyponym") get_hyponyms(root, 1) res = semantic_relations(1) print("\n\n") for r in res: print(r) graph2 = graph.make_graph_from_array(res) d = {} for tuple in res: d[(tuple[0], tuple[1])] = tuple[2] pos = nx.spring_layout(graph2) nx.draw(graph2, with_labels=True, pos=pos) nx.draw_networkx_edge_labels(graph2, pos=pos, edge_labels=d) plt.show() res = semantic_relations(2) print("\n\n") for r in res: print(r) graph2 = graph.make_graph_from_array(res) d = {} for tuple in res: d[(tuple[0], tuple[1])] = tuple[2] print(d) pos = nx.spring_layout(graph2) nx.draw(graph2, with_labels=True, pos=pos) nx.draw_networkx_edge_labels(graph2, pos=pos, edge_labels=d) plt.show() first = leacock_chodorow("szkoda", 2, "wypadek", 1, "n", "hypernym") first2 = leacock_chodorow("szkoda", 2, "wypadek", 1, "n", "hyponym") second = leacock_chodorow("kolizja", 2, "szkoda majątkowa", 1, "n", "hypernym") second2 = leacock_chodorow("kolizja", 2, "szkoda majątkowa", 1, "n", "hyponym") third = leacock_chodorow("nieszczęście", 2, "katastrofa budowlana", 1, "n", "hypernym") third2 = leacock_chodorow("nieszczęście", 2, "katastrofa budowlana", 1, "n", "hyponym") print( "szkoda-wypadek: {0} | kolizja-szkoda majątkowa: {1} | nieszczęście-katastrofa budowlana: {2}" .format(first, second, third)) print( "szkoda-wypadek: {0} | kolizja-szkoda majątkowa: {1} | nieszczęście-katastrofa budowlana: {2}" .format(first2, second2, third2))
def read_graph(file_path): with open(file_path, encoding='utf-8', mode='r') as fin: A, B, pref_listA, pref_listB = parser.parse(fin.read()) # map of the capacities capacities = dict(A) capacities.update(dict(B)) A = set(id for id, _ in A) B = set(id for id, _ in B) pref_listA = [(a, list(b)) for a, b in pref_listA] pref_listB = [(a, list(b)) for a, b in pref_listB] return graph.make_graph(A, B, pref_listA, pref_listB, capacities)
def find_path(layer, points, threshold): x = layers.Noise(4).add(layers.Constant(0.6)).clamp() x = x.translate(random.random() * 1000, random.random() * 1000) x = x.scale(0.01, 0.01) g = graph.make_graph(points, threshold, x) end = max(points, key=lambda (x, y): layer.get(x, y)) points.sort(key=lambda (x, y): math.hypot(x - end[0], y - end[1])) for start in reversed(points): path = graph.shortest_path(g, end, start) if path: return path
def find_path(layer, points, threshold): x = layers.Noise(4).add(layers.Constant(0.6)).clamp() x = x.translate(random.random() * 1000, random.random() * 1000) x = x.scale(0.01, 0.01) g = graph.make_graph(points, threshold, x) end = max(points, key=lambda p: layer.get(*p)) points.sort(key=lambda p: math.hypot(p[0] - end[0], p[1] - end[1])) for start in reversed(points): path = graph.shortest_path(g, end, start) if path: return path
def main_loop(): while True: graph.make_graph() link = upload("foo.png") sd = run().get_counts() to_email = "" to_email += "Motion restlessness:" to_email += str(sd[0]/2) to_email += "\n" to_email += "Accelerometer restlessness:" to_email += str(sd[1]) to_email += "\n" to_email += "Micrphone restlessness:" to_email += str(sd[2]) to_email += "\n" data.push(to_email,'data') to_say = "Last night, you experienced " + str(sd[0]/2) + " counts of restlessness based on motion tracking" data.push(formatter(link,to_say), "and") sleep(20)
def could_be_mlst(self, edge_set): """ returns if edge_set could be MLST """ input_graph = graph.make_graph(edge_set) input_graph.search() result = input_graph.num_of_components == 1 result = result and not input_graph.has_cycle if result: return input_graph.num_leaves return False
def read_output_graph(self, in_edge_set): nums = self.read_numbers('Cannot parse the number of edges.', 1) Gin = graph.make_graph(in_edge_set) Gin.search() if nums[0] != Gin.num_nodes-1: raise self.exception(('Input graph has {0} non-isolated '+ 'nodes, output graph should have {1} edges, '+ 'got {2} instead.').format(Gin.num_nodes, Gin.num_nodes-1, nums[0])) out_edge_set = set() num_edges = nums[0] for i in range(num_edges): nums = self.read_numbers('Cannot parse the next edge.', 2) e = graph.Edge(nums[0], nums[1]) if e not in in_edge_set: raise self.exception(('Edge {0} in the output graph is '+ 'absent in the input graph.').format(e)) if e in out_edge_set: raise self.exception(('Edge {0} (or its reverse) is '+ 'duplicated.').format(e)) out_edge_set.add(e) Gout = graph.make_graph(out_edge_set) Gout.search() if Gout.num_nodes != Gin.num_nodes: raise self.exception(('After reading the last edge, the number '+ 'of non-isolated nodes in the output graph ({0}) '+ 'should equal that of the input graph ({1}) to be '+ 'a spanning tree.').format(Gout.num_nodes, Gin.num_nodes)) if not Gout.edges_in_one_component: raise self.exception('Disconnected graph: after reading the last edge, the output graph should be connected to be a spanning tree.') if Gout.has_cycle: raise self.exception('Cycle detected: after reading the last edge, the output graph should not have cycles to be a spanning tree.') return Gout.num_leaves
def __init__(self, max_nodes, degree): self.degree = degree self.max_nodes = max_nodes self.edges = set(self.get_star(0, range(1,degree+1))) self.base_edges = self.edges.copy() self.num_vertices = self.degree + 1 self.graph = graph.make_graph(self.edges) self.graph.search() self.nbg = NbitGenerator(max_nodes, degree)
def __init__(self, max_nodes, degree): self.degree = degree self.max_nodes = max_nodes # initialize edges to be the "base cubit" self.base_edges = set() self.get_base_edges() self.edges = self.base_edges.copy() self.num_vertices = self.degree + 1 self.graph = graph.make_graph(self.edges) self.graph.search()
def test(): w = { (1,2) : 7, (1,3) : 9, (1,6) : 14, (2,4) : 15, (2,3) : 10, (6,3) : 2, (6,5) : 9, (3, 4) : 11, (5,4) : 6 } weights = graph.weights(w) g = graph.make_graph(w.keys()) bellman_ford(g, weights, g.vertices[1]) assert(g.vertices[1].d == 0) assert(g.vertices[2].d == 7) assert(g.vertices[3].d == 9) assert(g.vertices[4].d == 20) assert(g.vertices[5].d == 23) assert(g.vertices[6].d == 14) print('test passes')
def update_tables(scores, data): content = "" date = str(datetime.date.today()) mods = get_mods() imagepath = graph.make_graph(data) imageurl = upload_image(imagepath, date) bylambda = [ i for i in sorted(scores, key=itemgetter(1), reverse=True) if i[0] not in mods ][:10] byhelps = sorted(scores, key=itemgetter(2), reverse=True)[:10] subreddit.SUBREDDIT.stylesheet.upload("wikigraph", imagepath) content += "\n\n##/r/SmallYTChannel lambda tables: %s" % date content += "\n\n###By lambda:" content += "\n\nUsername|Lambda|Help given\n:--|:--|:--" for line in bylambda: content += "\n/u/%s|%i|%i" % (line[0], line[1], line[2]) content += "\n\n###By Help given:" content += "\n\nUsername|Lambda|Help given\n:--|:--|:--" for line in byhelps: λ = str(line[1]) if line[0] in mods: λ = "∞" content += "\n/u/%s|%s|%i" % (line[0], λ, line[2]) content += "\n\n##Statistics from %s:\n\nIf you're looking at this through the wiki, not through the bot's profile, then" % ( date) content += "the most up-to-date graph will be shown below. To see the graph at this date, follow [this link.](%s)" % ( imageurl) content += "\n\n![](%%%%wikigraph%%%%)\n\nTotal λ in circulation|Useful advice given|Unique users\n:--|:--|:--\n%i|%i|%i" % ( data[-1][1], data[-1][2], data[-1][3]) subreddit.REDDIT.subreddit("u_SmallYTChannelBot").submit( "/r/SmallYTChannel Statistics: %s" % date, url=imageurl).reply(content).mod.distinguish(sticky=True) subreddit.SUBREDDIT.wiki["lambdatables"].edit(content, reason="Update: %s" % date) subreddit.SUBREDDIT.wiki[date].edit(content, reason="Update: %s" % date) currentdata = subreddit.SUBREDDIT.wiki["index"].content_md currentdata += "\n\n* [%s](/r/SmallYTChannel/wiki/%s)" % (date, date) subreddit.SUBREDDIT.wiki["index"].edit(currentdata, reason="Update: %s" % date)
def test(): edges = {('s', 'v1') : Edge('s', 'v1', 16), ('v1', 'v3') : Edge('v1', 'v3', 12), ('s', 'v2') : Edge('s', 'v2', 13), ('v2', 'v1') : Edge('v2', 'v1', 4), ('v3', 'v2') : Edge('v3', 'v2', 9), ('v2', 'v4') : Edge('v2', 'v4', 14), ('v4', 't') : Edge('v4', 't', 4), ('v4', 'v3') : Edge('v4', 'v3', 7), ('v3', 't') : Edge('v3', 't', 20)} g = graph_utils.make_graph(edges.keys()) source = g.vertices['s'] sink = g.vertices['t'] print_edges(edges) print('After Ford-Fulkerson') ford_fulkerson(g.vertices, edges, source, sink) print_edges(edges)
def do_test(outfile, testfile): if not '_spread' in testfile: print 'Spreading file...\n' testfile = util.spread(testfile) print 'Making graph...' g = graph.make_graph(testfile) procs = g['procedures'] print '\nReading output...' learned = machine.read_out(outfile) print '\nApplying to input file...' results = machine.guess(learned, procs) machine.summarize(results) machine.matrix(results)
def test(): edges = { ('s', 'v1'): Edge('s', 'v1', 16), ('v1', 'v3'): Edge('v1', 'v3', 12), ('s', 'v2'): Edge('s', 'v2', 13), ('v2', 'v1'): Edge('v2', 'v1', 4), ('v3', 'v2'): Edge('v3', 'v2', 9), ('v2', 'v4'): Edge('v2', 'v4', 14), ('v4', 't'): Edge('v4', 't', 4), ('v4', 'v3'): Edge('v4', 'v3', 7), ('v3', 't'): Edge('v3', 't', 20) } g = graph_utils.make_graph(edges.keys()) source = g.vertices['s'] sink = g.vertices['t'] print_edges(edges) print('After Ford-Fulkerson') ford_fulkerson(g.vertices, edges, source, sink) print_edges(edges)
def find_mlst(self, input_edge_set): """ This tries every subset of <input_edge_set> """ #------------------------------------------------------------- best_leaves = 0 best_edge_set = set() for edge_set in self.get_edge_permutation(input_edge_set): g = graph.make_graph(edge_set) g.search() if g.num_of_components == 1 and g.num_leaves > best_leaves: best_leaves = g.num_leaves best_edge_set = edge_set #------------------------------------------------------------- return best_edge_set
def main(filename): with open(filename, 'r') as f: edges = parse(get_lines(filename)) graph = make_graph(edges) source = raw_input("Source vertex:") while not in_graph(graph, source): source = raw_input("Bad source\nSource vertex:") vec, back_edges = topological_sort(graph) negs = False for _, _, w in edges: if w < 0: negs = True cycle = len(back_edges) > 0 if not cycle: print "Graph is a DAG, running DAG SP algorithm" d, parent = dag_sp(graph, vec, source) elif not negs: print "Graph has no negative edges, running Dijkstra's algorithm" d, parent = dijkstra(graph, source) else: print "Running Bellman-Ford algorithm" d, parent = bellman_ford(graph, source) if d == None: ## Sufficent to assume parent is also None print "Graph contains a negative-weight cycle." sys.exit(0) while True: dest = raw_input("Destination vertex:") while not in_graph(graph, dest): dest = raw_input("Bad destination\nDestination vertex:") dist, order = get_shortest_path(d, parent, dest) if dist >= INFINITY: print "There is no way to reach the destination from the source" else: print "The distance between the vertices is %d" % dist print "The shortest path is %s" % pprint_path(order)
def make_junto(filename, title, folder='.', threshold=0, ret=False): if not '_spread' in filename: print 'Spreading file...\n' filename = util.spread(filename) print 'Making graph...' g = graph.make_graph(filename) if threshold: print '\nTrimming...' g['procedures'] = util.trim(g['procedures'], threshold) print '\nConstructing junto files...' graph.to_junto(g, title) with open(title + '_config', 'w') as f: f.write(config % {'title': title, 'folder': folder}) print '\n...Done' if ret: return g
def update_tables(scores, data): content = "" date = str(datetime.date.today()) mods = get_mods() imagepath = graph.make_graph(data) bylambda = [ i for i in sorted(scores, key=itemgetter(1), reverse=True) if i[0] not in mods ][:10] byhelps = sorted(scores, key=itemgetter(2), reverse=True)[:10] SUBREDDIT.stylesheet.upload("wikigraph", imagepath) content += "\n\n##/r/YouTuber_initiative Doot Coin tables: %s" % date content += "\n\n###By Doot Coin:" content += "\n\nUsername|Doot Coin|Help given\n:--|:--|:--" for line in bylambda: content += "\n/u/%s|%i|%i" % (line[0], line[1], line[2]) content += "\n\n###By Help given:" content += "\n\nUsername|Doot Coin|Help given\n:--|:--|:--" for line in byhelps: λ = str(line[1]) if line[0] in mods: λ = "∞" content += "\n/u/%s|%s|%i" % (line[0], λ, line[2]) content += "\n\n![](%%%%wikigraph%%%%)\n\nTotal Doot Coins in circulation|Useful advice given|Unique users\n:--|:--|:--\n%i|%i|%i" % ( data[-1][1], data[-1][2], data[-1][3]) SUBREDDIT.wiki["lambdatables"].edit(content, reason="Update: %s" % date) SUBREDDIT.wiki[date].edit(content, reason="Update: %s" % date) currentdata = SUBREDDIT.wiki["index"].content_md currentdata += "\n\n* [%s](/r/YouTuber_Initiative/wiki/%s)" % (date, date) SUBREDDIT.wiki["index"].edit(currentdata, reason="Update: %s" % date)
def __init__(self): self.initials = parse.parse_initials() self.I = list(set([i[0] for i in self.initials])) self.closings = parse.parse_closings() self.C = list(set([i[0] for i in self.closings])) ret = parse.parse_midpoints() self.midpoints = ret['chains'] self.midpoint_ex = ret['exemplars'] self.action_pairs = parse.parse_pairs() self.noc = parse.parse_NOC() self.exemplars = parse.parse_exemplars() self.action_graph = graph.make_graph(self.action_pairs['pairs'], self.action_pairs['links']) self.idiomatics = parse.parse_idiomatics() self.locations = parse.parse_locations() self.character_templates = templates.CHARACTER_DESCRIPTIONS self.location_templates = templates.SETTING_DESCRIPTIONS self.character_properties = parse.parse_character_properties() try: self.gs = GS(Word2Vec.load("../data/word2vec/w2v_103.model")) except: pass
def __init__(self, raw_graph): g, s, t = make_graph(raw_graph) Dijkstra.__init__(self, g, s, t)
def today_total(line_token: str, save_dir: str) -> None: ''' - 前回取得したデータと最新のデータを比較し日付が変わっている場合に - 前回取得したデータと最新の陽患者数の増加数の計算 - その他様々なデータを取得 - 増加数をグラフ描画し保存。メタデータをjsonで保存。 - 増加数のグラフ描画とデータをLINEにpost Args: line_token (str): LINE notifyのアクセストークン save_dir (str): 一時データを保存するディレクトリパス ''' try: body = get_requests( 'https://covid19-japan-web-api.now.sh/api/v1/total') except Exception as error: print(f'error:{error.args}') return day = body['date'] save_file_path = os.path.join(save_dir, 'save.json') daily_infections = os.path.join(save_dir, 'daily.json') graph_image_path = os.path.join(save_dir, 'graph_dayly.png') if os.path.isfile(save_file_path): old_body = json_read(save_file_path) old_day = old_body['date'] difference = body['positive'] - old_body['positive'] else: old_day = None difference = 0 if day != old_day: if os.path.isfile(daily_infections): daily = json_read(daily_infections) else: daily = [] day_obj = datetime.datetime.strptime(str(day), r'%Y%m%d') daily.append({'date': day, 'positive': difference}) text = f''' {day_obj.month}月{day_obj.day}日 ☣感染者: {body['positive']}人 (前日比: {difference:+}) - 退院: {body['discharge']}人 - 入院中: {body['hospitalize']}人 * 軽中度・無症状: {body['mild']}人 * 人工呼吸・ICU: {body['severe']}人 * 確認中: {body['confirming']}人 * 待機中: {body['waiting']}人 - 死亡: {body['death']}人 source by: https://covid-2019.live/ ''' make_graph(daily, graph_image_path, 'Number of infected persons per day') post_line(line_token, text, graph_image_path) print(text) print('-' * 30) print('\n\n') json_write(daily, daily_infections) json_write(body, save_file_path)
def make_graph(plistA, plistB): A = set(x[0] for x in plistA) B = set(x[0] for x in plistB) capacities = dict((a, 1) for a in A) capacities.update(dict((b, 1) for b in B)) return graph.make_graph(A, B, plistA, plistB, capacities)
def dfs_visit(s, g): for v in s.edges: if v not in parent or parent[v] is None: parent[v] = s v.predecessor = s dfs_visit(v, g) if __name__ == '__main__': g = graph.make_graph([ ('y', 'z'), ('a', 'b'), ('a', 'd'), ('b', 'c'), ('c', 'd'), ('b', 'y'), ('c', 'a'), ('d', 'y'), ]) dfs(g.vertices['a']) g = graph.make_graph([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'a'), ('e', 'f'), ('f', 'g'), ('g', 'c')]) dfs_2(g) for vertex in g.vertices.items(): print(vertex) g = graph.make_graph([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'a'), ('e', 'f'), ('f', 'g'), ('g', 'c')]) print('has cycle: ' + str(cycle_detection(g)))
def get_cycle(spanning_tree, edge): spanning_tree.add(edge) import graph graph = graph.make_graph(spanning_tree) path = graph.search_for_cycle_path(edge) return path #list of edges in cycle
def test1(): node_good = (pnode_good, version_good) node_bad = (pnode_bad, version_bad) dg_good, nodes_good = graph.make_graph(good) good_kdes = histogram.make_kdes(dg_good) good_vals = histogram.kde_predict_all(good_kdes, dg_good, node_good) name = histogram.get_name(dg_good, node_good) # extract all nodes with same name: proc_nodes = [] proc_vals = {} for node_num, t in nodes_good.iteritems(): for v in t: test_node = (node_num, v) gnode_name = histogram.get_name(dg_good, test_node) if gnode_name == name: proc_nodes.append(test_node) vals = histogram.kde_predict_all(good_kdes, dg_good, test_node) proc_vals[test_node] = vals for (k,v) in sorted(good_kdes[name].items()): print k, v[1] if v else v print print print ">>>>>GOOD ON GOOD" for (k,v) in sorted(good_vals.items()): print k,v mins = {} min_nodes = {} print ">>>>>GOOD ON GOOD (on %d nodes)" % len(proc_nodes) for node, vals in proc_vals.items(): for (k,v) in sorted(vals.items()): if not k in mins: mins[k] = v min_nodes[k] = node elif v < mins[k]: mins[k] = v min_nodes[k] = node for (k,v) in sorted(mins.items()): print k,v,min_nodes[k], histogram.get_vals(dg_good, min_nodes[k]) print print ">>>>>BAD ON GOOD" dg_bad, nodes_bad = graph.make_graph(bad) bad_on_good_vals = histogram.kde_predict_all(good_kdes, dg_bad, node_bad) for (k,v) in sorted(bad_on_good_vals.items()): print k,v print print ">>>>>COMPARISON" diffs = {} for k in good_vals: if good_vals[k] is not None and bad_on_good_vals[k] is not None: diffs[k] = good_vals[k] - bad_on_good_vals[k] else: diffs[k] = None for (k,v) in sorted(diffs.items()): print k,v
for v in u.edges: if v not in level: level[v] = i parent[v] = u next.append(v) frontier = next i += 1 return (level, parent) if __name__ == '__main__': g = graph.make_graph([ ('y', 'z'), ('a', 'b'), ('a', 'd'), ('b', 'c'), ('c', 'd'), ('b', 'y'), ('c', 'a'), ('d', 'y'), ]) bfs(g.vertices['a']) g = graph.make_graph([ ('y', 'z'), ('a', 'b'), ('a', 'd'), ('b', 'c'), ('c', 'd'), ('b', 'y'), ('c', 'a'), ('d', 'y'), ])
def repl_loop(modules, debug=False, do_eval=False): print("oFrugal shell for oMiser computational model") print("Press Ctrl-D to leave.") workspace = miser.namespace workspace.update(library.namespace) def completer(text, state): if text.startswith("^"): completions = [ "^" + o + " " for o in workspace if not o.startswith(".") and not o.endswith("(") and ( "^" + o).startswith(text) ] else: completions = [ o + " " for o in workspace if not o.endswith("(") and o.startswith(text) ] if len(completions) == 0: return return completions[state] readline.set_completer(completer) while True: line = None while len(modules) > 0: line = modules[0].readline() if line: break to_close = modules.pop(0) print(("Read: {}".format(to_close.name))) to_close.close() if line: s = line else: try: s = input("\noFrugal> ") except EOFError: print("\nBye!") break if not s.strip(): continue try: parsed = ofrugal_parser.parse(s) except UnexpectedCharacters as x: print("\nParser error:", x) continue else: # print (parsed) # TODO: properly integrate into shell pass I = Interpretation(workspace) # print((I(parsed))) try: statements = frugal_to_tree(s, workspace) except (ParseError, VisitationError) as exc: print(("Parsing error: {}".format(exc))) continue graph = False if isinstance(statements, Command): if statements.name == "include": file_name = statements.arguments.strip('"') try: modules.append(open(file_name, "r")) except IOError as x: print(("ERROR: {}".format(x))) continue elif statements.name == "debug": debug = not debug print(("Debug now {}".format(["OFF", "ON"][debug]))) continue elif statements.name == "eval": do_eval = not do_eval print(("Implicit eval now {}".format(["OFF", "ON"][do_eval]))) continue elif statements.name == "graph": graph = True if not statements.arguments.strip(): print("Empty input.") continue statements = frugal_to_tree(statements.arguments, workspace) elif isinstance(statements[0], Equation): if len(set(eq.varname for eq in statements)) != 1: print("ERROR: different variables") continue varname = statements[0].varname rules = [(eq.args, eq.result) for eq in statements] try: solutions = solve(rules) except TypeError as x: print("\nInternal error:", x) continue except KeyboardInterrupt: print("Aborted.") continue if solutions: workspace[varname] = solutions[-1] if debug: pprint(workspace) else: print("No solution found, more calculations needed.") continue if not all(good_statement(x) for x in statements): print(("ERROR: Ob expected, found: {}".format(statements))) continue if graph: make_graph(statements, DOT_FILE_PATH) print(("Graphviz file written to {}".format(DOT_FILE_PATH))) continue for s in statements: to_var = None if isinstance(s, tuple): to_var, s = s print(("{} = {}".format(to_var, str(s)))) if to_var is not None: workspace[to_var] = s if debug: pprint(workspace) else: if do_eval: evaluated = miser.eval(s) else: evaluated = s print(("{}".format(str(evaluated)))) if debug: print(("\nOUTPUT: {}".format(repr(evaluated)))) print(("\nOUTPUT STATE: {}".format( repr(evaluated.__getstate__()))))
if frame % 10 == 0: data.append([]) for event in pygame.event.get(): if event.type == pygame.QUIT: running = False pass for x in nodes: if frame % 10 == 0: data[int(frame / 10) - 1].append([x.infected, x.recovered, x.died]) x.move() if x.infected: pygame.draw.circle(screen, (255, 0, 0), [int(x.x), int(x.y)], 2) x.update(nodes, frame) elif x.recovered: pygame.draw.circle(screen, (100, 100, 100), [int(x.x), int(x.y)], 2) elif x.died: pygame.draw.circle(screen, (0, 0, 255), [int(x.x), int(x.y)], 2) else: pygame.draw.circle(screen, (255, 255, 255), [int(x.x), int(x.y)], 2) pygame.display.flip() pygame.time.wait(10) print("Generating graph...") final = graph.make_graph(data, healthy=tot) graph.base_graph(final, tot)
'input_datafile'] + "_{0}.csv" save_csv(result_path.format("coords"), coords_csv) save_csv(result_path.format("orders"), orders_csv) save_csv(result_path.format("executions"), execution_csv) # step5.2 : optional graph generation if config["results"]["graph"]["make"]: print(f"{datetime.now().time()} - Generate graphs...\n") inputs = (config['results']['graph']['nb_max'], config["input_datafile"], config["results"]["graph"]["show_names"], config["results"]["graph"]["link_vertices"], config["results"]["graph"]["map_background"], config["results"]["graph"]["gif_mode"], config["results"]["graph"]["fps"], config["back_to_origin"]) files = make_graph(path, local_data, to_compute, results_gen, results_opti, opti_names, *inputs) # step6.1 : send results online if online: print(f"{datetime.now().time()} - Upload data...\n") if config["results"]["graph"]["make"]: drive.upload_imgs(config["results"]["graph"]["gif_mode"]) drive.upload_csv(orders_csv, coords_csv, execution_csv, config["path_generation"]["nb_process"]) # step6.2 : optional cleaning if config["results"]["graph"]["make"] and \ not config["results"]["keep_local"]: print(f"{datetime.now().time()} - Clear directory...\n") [os.remove(file) for file in files if os.path.exists(file)]
visit(v) for vertex in v.edges: if vertex not in visited and vertex not in queue: queue.append(vertex) def bfs_2(s): level = {s : 0} parent = {s : None} i = 1 frontier = [s] while frontier: next = [] for u in frontier: for v in u.edges: if v not in level: level[v] = i parent[v] = u next.append(v) frontier = next i += 1 return (level, parent) if __name__ == '__main__': g = graph.make_graph([('y','z'), ('a','b'), ('a', 'd'), ('b', 'c'), ('c', 'd'), ('b', 'y'), ('c', 'a'), ('d', 'y'), ]) bfs(g.vertices['a']) g = graph.make_graph([('y','z'), ('a','b'), ('a', 'd'), ('b', 'c'), ('c', 'd'), ('b', 'y'), ('c', 'a'), ('d', 'y'), ]) level , parent = bfs_2(g.vertices['a']) print(level) print(parent)
#!/usr/bin/env python from input import get_lines, parse from graph import make_graph, topological_sort import sys from os.path import isfile filename = sys.argv[1] if not isfile(filename): exit(1) lines = get_lines(filename) edges = parse(lines) graph = make_graph(edges) path, back_edges = topological_sort(graph) if not back_edges: print "The path is: " + str(path) else: print "The graph is cyclic. Back edges: " + str(back_edges)
def test_graph(cost, source): cost = graph_utils.weights(cost) g = graph_utils.make_graph(cost.keys()) dijkstra(g, cost, g.vertices[source]) for vertex in g.vertices.values(): print(vertex)
import pandas as pd from pathlib import Path import datetime from graph import make_graph target_filename = Path("election_data_SG_GA.csv") # Fetch data cnn_url = "https://politics-elex-results.data.api.cnn.io/results/view/2020-SG-GA.json" contents = urllib.request.urlopen(cnn_url).read() record = json.loads(contents.decode("utf8")) # Assemble dataframe row = {"time": datetime.datetime.now()} for candidate in record["candidates"]: row[candidate["lastName"]] = candidate["voteNum"] # Append dataframe if target_filename.exists(): df = pd.read_csv(target_filename).append(pd.DataFrame([row])) else: df = pd.DataFrame([row]) # Save dataframe df.to_csv(target_filename, index = False) # Save graph make_graph()
def load(screen_name=None, user_id=None, force_db_update = False, force_twitter_update=False, debug=False): ''' Main entry point into gravitty module. Should be used by importing gravitty and calling gravitty.load('<your_screen_name'). Please see the readme at github.com/ericjeske/gravitty for mandatory setup instructions and api requirements. The load function will make every attempt to load data from cache sources (mongoDB) before using twitter's api. It is, however, suggested that multiple twitter api keys are utilized with this app to avoid rate limiting restrictions. By default, running this function will return a json object that can be parsed by d3.js to create a community graph. Additional information, including the raw twitter data, parsed twitter data, user similarity, community clustering dendrogram, community analytics data, community networkx graph, and community json object, can be returned by passing in debug=True. Also, by default, this app will create two pickled objects, one containing the debug data described above, the other containing the community json file. Subsequent calls for the same user will use this data to save time (and api calls). To override the use of pickled data, use force_db_update = True. Data for each follower will be pulled from mongoDB if possible, otherwise it will be pulled from twitter. To do a clean-slate download, downloading everything from twitter, use force_twitter_update = True. ''' if screen_name == None and user_id == None: raise Exception('Please enter an id or name') # Assume that if screen_name was not provided (only user id) then a # pickle has not been created. if screen_name is not None: ABS_PKL_PATH = os.path.join(os.path.dirname(__file__), PKL_PATH) sn_file = ABS_PKL_PATH + str(screen_name) + '.' + PKL_FILE_EXT sn_file_debug = ABS_PKL_PATH + str(screen_name) + '.' + DBG_FILE_EXIT # Check to see if there are pickles for the user. Note that this will # be overriden if force_db_update is set to true if os.path.isfile(sn_file_debug) and debug \ and not force_twitter_update and not force_db_update: return pickle.load(open(sn_file_debug, 'rb')) if os.path.isfile(sn_file) \ and not force_twitter_update and not force_db_update: return pickle.load(open(sn_file, 'rb')) # Use api credentials from files located in the API_PATH. ABS_API_PATH = os.path.join(os.path.dirname(__file__), API_PATH) apis = oauth_login(ABS_API_PATH) # Try to start up a mongo database connection to cache data in try: conn = pymongo.MongoClient("localhost", 27017) except pymongo.errors.ConnectionFailure: print 'Please run mongod and re-run program' raise Exception('DBError') db = conn[DB_NAME] # Get the target user's data from either the screen_name or user_id user_data = get_user_data(db, apis[0], name = screen_name, uid = user_id, force = force_twitter_update) # If the user is protected (or has more than the maximum # followers/friends), then return an error if user_data == None: print 'Was unable to access data for %s / %s' % (screen_name, user_id) raise Exception('TargetError') user_info, user_tweets, followers, following, user_lists = user_data # Using the target user's list of followers (user ids), get the same # information we just got for the target user for each of its followers raw_df = get_follower_data(db, apis, followers, force = force_twitter_update) # Filter the dataframe for inactive users. Then parse the raw dataframe # to extract the relevant features from the raw data df = parse_dataframe( filter_dataframe(raw_df) ) # With the features in hand, calculate the latent similarity between each # set of users. See similarity.py for more detail on the calculations of # this similarity metric. # The resulting dataframe will be a square matrix indexed/columned by # user_id and contain the undirected edge weights between each pair of # users. df_similarity = make_similarity_dataframe(df) # Make an undirected representing the relationship between each user, # if any. Each node ID is the user ID, each edge weight is equal to the # similarity score between those two users. graph = make_graph(df, df_similarity) # Using the louvain method, find communities within the weighted graph. # The returned dendrogram is a list of dictionaries where the values of # each dictionary are the keys of the next dictionary. The length of the # dendrogram indicates the number of levels of community clusters # detected. dendrogram = generate_dendrogram(graph) # Add a final mapping to the dendrogram that maps everyone into the # same community. They are, after all, followers of the same user. dendrogram.append({k:0 for k in dendrogram[-1].values()}) # Modify the dataframe to contain columns titled 'cid + <level>'. Each # column contains the community id's for that level for each user. # Also, this is a convenient time to calculate graph modularity at each # level so produce that here as well. df, modularity = get_community_assignment(df, graph, dendrogram) num_levels = len(dendrogram) # For each community at each level of the dendrogram, find the topics, # sentiment, biggest influencers, etc. for each. data = get_community_analytics(df, graph, num_levels, community_modularity = modularity) # Both the mentioned and most connected users fields from the community # analytics function are user ids. Turn them into screen names. data = get_screen_names(data, 'mentioned', df, db, apis[0]) data = get_screen_names(data, 'most_connected', df, db, apis[0]) # Close the database connection. It is no longer needed. conn.close() # Create a networkx graph where each node represents a community. Edges # represent membership into larger communities at the next level up ( # down?) the dendrogram and have no edge weights. The data obtained in # the previous steps from community_analytics is loaded into the # attributes of each node. community_graph = create_community_graph(data, dendrogram) # Parse this graph into a json representation for use & consumption by # d3.js community_json = create_community_json(community_graph, user_info) # Just in case we don't have the screen name, grab it. if screen_name is None: screen_name = user_info['screen_name'] # Pickle the objects for reuse. ABS_PKL_PATH = os.path.join(os.path.dirname(__file__), PKL_PATH) sn_file = ABS_PKL_PATH + str(screen_name) + '.' + PKL_FILE_EXT sn_file_debug = ABS_PKL_PATH + str(screen_name) + '.' + DBG_FILE_EXIT pickle.dump((raw_df, df, df_similarity, dendrogram, data, community_graph, community_json), open(sn_file_debug, 'wb')) pickle.dump(community_json, open(sn_file, 'wb')) # If debug is true, return all of the precusor objects along with the json if debug: return (raw_df, df, df_similarity, dendrogram, data, community_graph, community_json) # Otherwise return the json object return community_json
return has_cycle parent = {} def dfs_2(g): for s in g.vertices.values(): if s not in parent or parent[s] is None: parent[s] = None s.predecessor = None dfs_visit(s, g) def dfs_visit(s, g): for v in s.edges: if v not in parent or parent[v] is None: parent[v] = s v.predecessor= s dfs_visit(v, g) if __name__ == '__main__': g = graph.make_graph([('y','z'), ('a','b'), ('a', 'd'), ('b', 'c'), ('c', 'd'), ('b', 'y'), ('c', 'a'), ('d', 'y'), ]) dfs(g.vertices['a']) g = graph.make_graph([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'a'), ('e', 'f'), ('f', 'g'), ('g', 'c')]) dfs_2(g) for vertex in g.vertices.items(): print(vertex) g = graph.make_graph([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'a'), ('e', 'f'), ('f', 'g'), ('g', 'c')]) print('has cycle: ' + str(cycle_detection(g)))
def get_text(save_dir, file_name, write_=False): read_dir = save_dir + file_name # Mention the installed location of Tesseract-OCR in your system #pytesseract.pytesseract.tesseract_cmd = 'System_path_to_tesseract.exe' print(read_dir) # Read image from which text needs to be extracted img = cv2.imread(read_dir) rect = img img2 = img #remove lines and form contours contours, hierarchy, img = ignore_lines(img, save_dir, file_name) # Creating a copy of image #rect = img2.copy() # A text file is created and flushed file = open("recognized.txt", "a") file_ = open(save_dir + "recognized.txt", "a") file.write("") file_.write("") #assigning rows to contours contoursBBS = make_rows(contours) #combining contours on the bases of contour thresh x merge_cnt = merge_boxes(rect, contoursBBS, thresh_x=1.0, thresh_y=0.6) column_contours = segment_columns(img2, img.shape, merge_cnt) print("recognizing text", flush=True) #make_graph(rect,merge_cnt,[1,3,9,21,14,37,24,29,35,74,65,43,45,12,56],column_contours) #return # Looping through the identified contours # Then rectangular part is cropped and passed on # to pytesseract for extracting text from it # Extracted text is then written into the text file # Open the file in append mode croptime = 0 tesstime = 0 tt = time.time() key_nodes = [] text_val = {} node_number = 0 node_columns = {} for cnt in sorted(merge_cnt): #print(cnt) #cv2.line(rect,(0,cnt),(len(rect[0])-1,cnt),(255,0,0),2) for contour in merge_cnt[cnt]: node_number += 1 [x, y, w, h] = contour if h < 10: continue # Drawing a rectangle on copied image rect = cv2.rectangle(rect, (x, y), (x + w, y + h), (0, 255, 0), 1) # Cropping the text block for giving input to OCR #offset= int (h*cnt_thresh_y) #y=y-offset #x=x-offset start = time.time() cropped = img[max(0, y - 2):y + h + 2, max(0, x - 2):x + w + 2] end = time.time() croptime += end - start # Apply OCR on the cropped image text = "" text = pytesseract.image_to_string(cropped, lang='eng', config='--psm 6') p = text.split(' ') for tex in p: tex = tex.lower() if (tex in keys): rect = cv2.rectangle(rect, (x, y), (x + w, y + h), (0, 0, 255), 1) key_nodes.append(node_number - 1) break else: for k in keys: #print(k + " " + tex) if (k in tex): rect = cv2.rectangle(rect, (x, y), (x + w, y + h), (0, 0, 255), 1) key_nodes.append(node_number - 1) break if (len(text) >= 1 and levenshtein_ratio_and_distance(k, tex) > 0.8): rect = cv2.rectangle(rect, (x, y), (x + w, y + h), (0, 0, 255), 1) key_nodes.append(node_number - 1) # if(':' in text): # key, val = text.split(':',1) # break break text_val[node_number - 1] = text end2 = time.time() tesstime += end2 - end # Appending the text into file if text != "": if write_: file.write(text) file_.write(text) if write_: file_.write("\n") file.write("\n") tt = time.time() - tt print("croptime: ", croptime, " tesstime: ", tesstime, " tt: ", tt) # Close the file file.close cv2.imwrite(save_dir + 'boxed_' + file_name, rect) make_graph(rect, merge_cnt, key_nodes, column_contours, text_val, synonyms, labels)
def test1(): node_good = (pnode_good, version_good) node_bad = (pnode_bad, version_bad) dg_good, nodes_good = graph.make_graph(good) good_kdes = histogram.make_kdes(dg_good) good_vals = histogram.kde_predict_all(good_kdes, dg_good, node_good) name = histogram.get_name(dg_good, node_good) # extract all nodes with same name: proc_nodes = [] proc_vals = {} for node_num, t in nodes_good.iteritems(): for v in t: test_node = (node_num, v) gnode_name = histogram.get_name(dg_good, test_node) if gnode_name == name: proc_nodes.append(test_node) vals = histogram.kde_predict_all(good_kdes, dg_good, test_node) proc_vals[test_node] = vals for (k, v) in sorted(good_kdes[name].items()): print k, v[1] if v else v print print print ">>>>>GOOD ON GOOD" for (k, v) in sorted(good_vals.items()): print k, v mins = {} min_nodes = {} print ">>>>>GOOD ON GOOD (on %d nodes)" % len(proc_nodes) for node, vals in proc_vals.items(): for (k, v) in sorted(vals.items()): if not k in mins: mins[k] = v min_nodes[k] = node elif v < mins[k]: mins[k] = v min_nodes[k] = node for (k, v) in sorted(mins.items()): print k, v, min_nodes[k], histogram.get_vals(dg_good, min_nodes[k]) print print ">>>>>BAD ON GOOD" dg_bad, nodes_bad = graph.make_graph(bad) bad_on_good_vals = histogram.kde_predict_all(good_kdes, dg_bad, node_bad) for (k, v) in sorted(bad_on_good_vals.items()): print k, v print print ">>>>>COMPARISON" diffs = {} for k in good_vals: if good_vals[k] is not None and bad_on_good_vals[k] is not None: diffs[k] = good_vals[k] - bad_on_good_vals[k] else: diffs[k] = None for (k, v) in sorted(diffs.items()): print k, v
def test(): graph = graph_utils.make_graph([(1,2), (2,3), (3, 4), (3,5), (4,6), (5,7), (6,3), (7,6), (7,3)]) print('first test: ') sccs = tarjans(graph) print_components(sccs) graph = graph_utils.make_graph([(1,2), (2,4), (4,3), (3,1), (5,3), (5,4), (5,6), (6,7), (7,5), (9,8), (8, 10), (10,9), (10,11), (11,10), (12,13), (11,13)]) print('second test: ') sccs = tarjans(graph) print_components(sccs)