def main(): initialize_logger('./log') generated = datetime.now() kind, source, destination_path, moderator, timestep_size, timestep_window, timestep_count, create_datapackage, license_type, license_url, datapackage_title = parse_options(sys.argv[1:]) logging.info("Parsing catalyst - Started") logging.info("Parsing catalyst - Source file: %(s)s" % {'s':source}) logging.info("Parsing catalyst - Output directory: %(s)s" % {'s':destination_path}) logging.info("Parsing catalyst - Extraction Kind: %(s)s" % {'s':kind}) # 1. load and parse the JSON file into a RDF Graph graph = ec.inference.catalyst_graph_for(source) # 2. extract the usersnodes,comments from the graph use_posts = (kind == 'posts') or (kind == 'both') use_ideas = (kind == 'ideas') or (kind == 'both') assert use_ideas or use_posts, "kind must be ideas, posts or both" moderator_test = None if moderator: moderator_test = partial(ec.extract.is_moderator, graph, moderator_roles=(moderator,)) network = ec.extract.ideas.graph_to_network(generated, graph, use_ideas, use_posts, moderator_test) directed_multiedge_network = calculate_network_metrics({}, {}, {}, network, timestep_size, timestep_window, timestep_count) eu.resource.write_network(network, \ directed_multiedge_network, \ generated, \ create_datapackage, \ datapackage_title, \ license_type, \ license_url, \ destination_path) logging.info("Parsing catalyst - Completed")
def main(): initialize_logger('./albertoEdgesenseLog') generated = datetime.now() users_resource, \ nodes_resource, \ comments_resource, \ node_title_field, \ timestep_size, \ timestep_window, \ timestep_count, \ username, \ password, \ extraction_method, \ admin_roles, \ exclude_isolated, \ dumpto, \ create_datapackage, \ datapackage_title, \ license_type, \ license_url, \ destination_path = parse_options(sys.argv[1:]) logging.info("Network processing - started") # Load the files allusers, allnodes, allcomments = load_files(users_resource, nodes_resource, comments_resource, username, password, extraction_method, dumpto, generated) # extract a normalized set of data nodes_map, posts_map, comments_map = eu.extract.normalized_data(allusers, allnodes, allcomments, node_title_field, admin_roles, exclude_isolated) # this is the network object # going forward it should be read from a serialized format to handle caching network = {} # Add some file metadata network['meta'] = {} # Timestamp of the file generation (to show in the dashboard) network['meta']['generated'] = int(generated.strftime("%s")) network['edges'] = extract_edges(nodes_map, comments_map) network['edges'] += extract_multiauthor_post_edges(nodes_map, posts_map) # filter out nodes that have not participated to the full:conversations inactive_nodes = [ v for v in nodes_map.values() if not v['active'] ] logging.info("inactive nodes: %(n)i" % {'n':len(inactive_nodes)}) network['nodes'] = [ v for v in nodes_map.values() if v['active'] ] directed_multiedge_network = calculate_network_metrics(nodes_map, posts_map, comments_map, network, timestep_size, timestep_window, timestep_count) eu.resource.write_network(network, \ directed_multiedge_network, \ generated, \ create_datapackage, \ datapackage_title, \ license_type, \ license_url, \ destination_path) logging.info("Completed")
def parse(): node_title_field = 'uid' timestep_size = 60*60*24*7 timestep_window = 1 timestep_count = 20 username = None password = None extraction_method = 'nested' admin_roles = set() exclude_isolated = False create_datapackage = False license_type = None license_url = None datapackage_title = None kind = 'both' moderator = None generated = datetime.now() source_json = request.form['source'] if request.form.has_key('source') else None if not source_json: raise InvalidUsage('Missing parameters', status_code=400) initialize_logger('./log') logging.info("parse_source - Started") logging.info("parse_source - Source: %(s)s" % {'s':source_json}) logging.info("parse_source - Extraction Kind: %(s)s" % {'s':kind}) # 1. load and parse the JSON file into a RDF Graph graph = ec.inference.catalyst_graph_for(source_json) # 2. extract the usersnodes,comments from the graph use_posts = (kind == 'posts') or (kind == 'both') use_ideas = (kind == 'ideas') or (kind == 'both') assert use_ideas or use_posts, "kind must be ideas, posts or both" moderator_test = None if moderator: moderator_test = partial(ec.extract.is_moderator, graph, moderator_roles=(moderator,)) network = ec.extract.ideas.graph_to_network(generated, graph, use_ideas, use_posts, moderator_test) directed_multiedge_network = calculate_network_metrics({}, {}, {}, network, timestep_size, timestep_window, timestep_count) eu.resource.write_network(network, \ directed_multiedge_network, \ generated, \ create_datapackage, \ datapackage_title, \ license_type, \ license_url, \ destination_path) # return the result URL tag = generated.strftime('%Y-%m-%d-%H-%M-%S') base_path = os.path.join("/json/data", tag) result_path = os.path.join(base_path, "network.min.json") logging.info("Completed: %(s)s" % {'s':result_path}) return jsonify({'last': tag, 'base_path': base_path, 'metrics': 'network.min.json', 'gexf': 'network.gexf', 'datapackage': 'datapackage.json' })
def main(): initialize_logger("./log") generated = datetime.now() users_resource, nodes_resource, comments_resource, node_title_field, timestep_size, timestep_window, timestep_count, username, password, extraction_method, admin_roles, exclude_isolated, dumpto, create_datapackage, datapackage_title, license_type, license_url, destination_path = parse_options( sys.argv[1:] ) logging.info("Network processing - started") # Load the files allusers, allnodes, allcomments = load_files( users_resource, nodes_resource, comments_resource, username, password, extraction_method, dumpto, generated ) # extract a normalized set of data nodes_map, posts_map, comments_map = eu.extract.normalized_data( allusers, allnodes, allcomments, node_title_field, admin_roles, exclude_isolated ) # this is the network object # going forward it should be read from a serialized format to handle caching network = {} # Add some file metadata network["meta"] = {} # Timestamp of the file generation (to show in the dashboard) network["meta"]["generated"] = int(time.mktime(generated.timetuple())) # Windows-compatible network["edges"] = extract_edges(nodes_map, comments_map) network["edges"] += extract_multiauthor_post_edges(nodes_map, posts_map) # filter out nodes that have not participated to the full:conversations inactive_nodes = [v for v in nodes_map.values() if not v["active"]] logging.info("inactive nodes: %(n)i" % {"n": len(inactive_nodes)}) network["nodes"] = [v for v in nodes_map.values() if v["active"]] directed_multiedge_network = calculate_network_metrics( nodes_map, posts_map, comments_map, network, timestep_size, timestep_window, timestep_count ) eu.resource.write_network( network, directed_multiedge_network, generated, create_datapackage, datapackage_title, license_type, license_url, destination_path, ) logging.info("Completed")
def main(): initialize_logger('./log') generated = datetime.now() kind, source, destination_path, moderator, timestep_size, timestep_window, timestep_count, create_datapackage, license_type, license_url, datapackage_title = parse_options( sys.argv[1:]) logging.info("Parsing catalyst - Started") logging.info("Parsing catalyst - Source file: %(s)s" % {'s': source}) logging.info("Parsing catalyst - Output directory: %(s)s" % {'s': destination_path}) logging.info("Parsing catalyst - Extraction Kind: %(s)s" % {'s': kind}) # 1. load and parse the JSON file into a RDF Graph graph = ec.inference.catalyst_graph_for(source) # 2. extract the usersnodes,comments from the graph use_posts = (kind == 'posts') or (kind == 'both') use_ideas = (kind == 'ideas') or (kind == 'both') assert use_ideas or use_posts, "kind must be ideas, posts or both" moderator_test = None if moderator: moderator_test = partial(ec.extract.is_moderator, graph, moderator_roles=(moderator, )) network = ec.extract.ideas.graph_to_network(generated, graph, use_ideas, use_posts, moderator_test) directed_multiedge_network = calculate_network_metrics({}, {}, {}, network, timestep_size, timestep_window, timestep_count) eu.resource.write_network(network, \ directed_multiedge_network, \ generated, \ create_datapackage, \ datapackage_title, \ license_type, \ license_url, \ destination_path) logging.info("Parsing catalyst - Completed")
def main(): initialize_logger('./albertoEdgesenseLog') generated = datetime.now() users_resource, \ nodes_resource, \ comments_resource, \ node_title_field, \ timestep_size, \ timestep_window, \ timestep_count, \ username, \ password, \ extraction_method, \ admin_roles, \ exclude_isolated, \ dumpto, \ create_datapackage, \ datapackage_title, \ license_type, \ license_url, \ destination_path = parse_options(sys.argv[1:]) logging.info("Network processing - started") # Load the files allusers, allnodes, allcomments = load_files(users_resource, nodes_resource, comments_resource, username, password, extraction_method, dumpto, generated) # extract a normalized set of data nodes_map, posts_map, comments_map = eu.extract.normalized_data( allusers, allnodes, allcomments, node_title_field, admin_roles, exclude_isolated) # this is the network object # going forward it should be read from a serialized format to handle caching network = {} # Add some file metadata network['meta'] = {} # Timestamp of the file generation (to show in the dashboard) network['meta']['generated'] = int(generated.strftime("%s")) network['edges'] = extract_edges(nodes_map, comments_map) network['edges'] += extract_multiauthor_post_edges(nodes_map, posts_map) # filter out nodes that have not participated to the full:conversations inactive_nodes = [v for v in nodes_map.values() if not v['active']] logging.info("inactive nodes: %(n)i" % {'n': len(inactive_nodes)}) network['nodes'] = [v for v in nodes_map.values() if v['active']] directed_multiedge_network = calculate_network_metrics( nodes_map, posts_map, comments_map, network, timestep_size, timestep_window, timestep_count) eu.resource.write_network(network, \ directed_multiedge_network, \ generated, \ create_datapackage, \ datapackage_title, \ license_type, \ license_url, \ destination_path) logging.info("Completed")