def output_kafka(graph_db, registry, kafka_url=None): ldict = {"step": MODULEFILE + "/" + inspect.stack()[0][3], "hostname": platform.node().split(".")[0]} l = logging.LoggerAdapter(common.fetch_lg(), ldict) kafka_topic = "cs" if kafka_url is None: kafka_url = registry.get_config("kafka_url", "localhost:9092") else: l.info("Updating registry with kafka_url: {}".format(kafka_url)) registry.put_config("kafka_url", kafka_url) (nodes, rels) = out.output_json(graph_db, None, None, as_list=True) l.info("Connecting to kafka_url {}".format(kafka_url)) kafka = KafkaClient(kafka_url) # To send messages asynchronously producer = SimpleProducer(kafka) l.info("Sending nodes to kafka {}/{}".format(kafka_url, kafka_topic)) for n in nodes: producer.send_messages(kafka_topic, n) l.info("Sending rels to kafka {}/{}".format(kafka_url, kafka_topic)) for n in rels: producer.send_messages(kafka_topic, n) kafka.close()
def output_json(graph_db, out_file_nodes, out_file_rels, as_list=False): ldict = {"step": MODULEFILE + "/" + inspect.stack()[0][3], "hostname": platform.node().split(".")[0]} l = logging.LoggerAdapter(common.fetch_lg(), ldict) if out_file_nodes is None: out_file_nodes = "/dev/stdout" if out_file_rels is None: out_file_rels = "/dev/stdout" nod_obj = {} rel_obj = {} l.info("Iterating all nodes and rels") for r in constants.CONST_REL_TYPE: for k, v in graph_db(r).iteritems(): rel_obj[v.id()] = v nod_obj[v.srcId()] = \ graph_db(constants.getNodeType(v.srcType()))[v.srcHash()] nod_obj[v.dstId()] = \ graph_db(constants.getNodeType(v.dstType()))[v.dstHash()] if as_list: return (_convert_node_json(nod_obj, l), _convert_rel_json(rel_obj, nod_obj, l)) if out_file_nodes: nodes_list = _convert_node_json(nod_obj, l) with open(out_file_nodes, 'w') as w: for n in nodes_list: w.write(n + "\n") l.info("wrote node list as json to {}".format(out_file_nodes)) if out_file_rels: rels_list = _convert_rel_json(rel_obj, nod_obj, l) with open(out_file_rels, 'w') as w: for r in rels_list: w.write(r + "\n") l.info("wrote rel list as json to {}".format(out_file_rels))
type=str, default=None, help='store results in kafka, specify kafka router url') group_out.add_argument('--outputjson', metavar="FILE.json", type=str, default=None, nargs=2, help='store results in json files, specify two files.' + 'files. First file for nodes, second for rels') args = parser.parse_args() lg = common.setup_logger(level=args.verbose, logfile=None) hs = platform.node().split(".")[0] ldict = {"step": "main", "hostname": hs} l = logging.LoggerAdapter(common.fetch_lg(), ldict) if args.inputlog: if not os.path.exists(args.inputlog): l.error("input log file does not exist") sys.exit(1) g = Node.myGraph(cache=None, inputfile=args.inputlog, etcdhost=args.etcdhost, etcdport=args.etcdport) elif args.inputlocal: g = Node.myGraph(cache=None, etcdhost=args.etcdhost, etcdport=args.etcdport) else:
def output_neo4j(graph_db, registry, neo4j_url=None, debug=False): ldict = {"step": MODULEFILE + "/" + inspect.stack()[0][3], "hostname": platform.node().split(".")[0]} l = logging.LoggerAdapter(common.fetch_lg(), ldict) nod_obj = {} rel_obj = {} nod_neo = {} #rel_neo = {} for r in constants.CONST_REL_TYPE: for k, v in graph_db(r).iteritems(): rel_obj[v.id()] = v nod_obj[v.srcId()] = \ graph_db(constants.getNodeType(v.srcType()))[v.srcHash()] nod_obj[v.dstId()] = \ graph_db(constants.getNodeType(v.dstType()))[v.dstHash()] if neo4j_url is None: neo4j_url = registry.get_config("neo4j_url", 'http://localhost:7474/db/data') else: l.info("Updating registry with neo4j_url: {}".format(neo4j_url)) registry.put_config("neo4j_url", neo4j_url) l.info("Connecting to neo4j url {}".format(neo4j_url)) neo4j_db = neo4j.GraphDatabaseService(neo4j_url) batch = neo4j.WriteBatch(neo4j_db) # write all nodes for k, v in nod_obj.iteritems(): batch.\ get_or_create_indexed_node("vid", "vid", k, v.property()) l.info("Submitting node data to neo4j") result = batch.submit() if debug: results = "\t\t" for i in result: results += "\n\t\t{}".format(i) l.info("Got all node: {}".format(results)) for k, v in zip(nod_obj.keys(), result): nod_neo[k] = v # add node labels and index it batch = neo4j.WriteBatch(neo4j_db) for k, v in nod_obj.iteritems(): batch.set_labels(nod_neo[k], constants.getNodeType(v.type())) batch.add_to_index(neo4j.Node, "Name", "name", v.property()["name"], nod_neo[k]) l.info("Submitting node index/label data to neo4j") result = batch.submit() # add rel batch = neo4j.WriteBatch(neo4j_db) for k, v in rel_obj.iteritems(): batch.get_or_create_path(nod_neo[v.srcId()], constants.getRelType(v.type()), nod_neo[v.dstId()]) l.info("Submitting rel data to neo4j") result = batch.submit() if debug: results = "\t\t" for i in result: results += "\n\t\t{}".format(i) l.info("Got all node: {}".format(results))