Пример #1
0
def main():
    initialize_logger('./log')
    
    generated = datetime.now()
    kind, source, destination_path, moderator, timestep_size, timestep_window, timestep_count, create_datapackage, license_type, license_url, datapackage_title = parse_options(sys.argv[1:])
    logging.info("Parsing catalyst - Started")
    logging.info("Parsing catalyst - Source file: %(s)s" % {'s':source})
    logging.info("Parsing catalyst - Output directory: %(s)s" % {'s':destination_path})
    logging.info("Parsing catalyst - Extraction Kind: %(s)s" % {'s':kind})
    
    # 1. load and parse the JSON file into a RDF Graph
    
    graph = ec.inference.catalyst_graph_for(source)
    
    # 2. extract the usersnodes,comments from the graph
    use_posts = (kind == 'posts') or (kind == 'both')
    use_ideas = (kind == 'ideas') or (kind == 'both')
    assert use_ideas or use_posts, "kind must be ideas, posts or both"
    moderator_test = None
    if moderator:
        moderator_test = partial(ec.extract.is_moderator, graph, moderator_roles=(moderator,))
    network = ec.extract.ideas.graph_to_network(generated, graph, use_ideas, use_posts, moderator_test)
    
    directed_multiedge_network = calculate_network_metrics({}, {}, {}, network, timestep_size, timestep_window, timestep_count)
    
    eu.resource.write_network(network, \
                     directed_multiedge_network, \
                     generated, \
                     create_datapackage, \
                     datapackage_title, \
                     license_type, \
                     license_url, \
                     destination_path)

    logging.info("Parsing catalyst - Completed")
def main():
    initialize_logger('./albertoEdgesenseLog')
    generated = datetime.now()
    
    users_resource, \
    nodes_resource, \
    comments_resource, \
    node_title_field, \
    timestep_size, \
    timestep_window, \
    timestep_count, \
    username, \
    password, \
    extraction_method, \
    admin_roles, \
    exclude_isolated, \
    dumpto, \
    create_datapackage, \
    datapackage_title, \
    license_type, \
    license_url, \
    destination_path = parse_options(sys.argv[1:])
    
    logging.info("Network processing - started")
    
    # Load the files
    allusers, allnodes, allcomments = load_files(users_resource, nodes_resource, comments_resource, username, password, extraction_method, dumpto, generated)
    
    # extract a normalized set of data
    nodes_map, posts_map, comments_map = eu.extract.normalized_data(allusers, allnodes, allcomments, node_title_field, admin_roles, exclude_isolated)

    # this is the network object
    # going forward it should be read from a serialized format to handle caching
    network = {}

    # Add some file metadata
    network['meta'] = {}
    # Timestamp of the file generation (to show in the dashboard)
    network['meta']['generated'] = int(generated.strftime("%s"))
        
    network['edges'] = extract_edges(nodes_map, comments_map)
    network['edges'] += extract_multiauthor_post_edges(nodes_map, posts_map)

    # filter out nodes that have not participated to the full:conversations
    inactive_nodes = [ v for v in nodes_map.values() if not v['active'] ]
    logging.info("inactive nodes: %(n)i" % {'n':len(inactive_nodes)})
    network['nodes'] = [ v for v in nodes_map.values() if v['active'] ]
    
    directed_multiedge_network = calculate_network_metrics(nodes_map, posts_map, comments_map, network, timestep_size, timestep_window, timestep_count)
    
    eu.resource.write_network(network, \
                     directed_multiedge_network, \
                     generated, \
                     create_datapackage, \
                     datapackage_title, \
                     license_type, \
                     license_url, \
                     destination_path)
    
    logging.info("Completed")  
Пример #3
0
def parse():
    node_title_field = 'uid'
    timestep_size = 60*60*24*7
    timestep_window = 1
    timestep_count = 20
    username = None
    password = None
    extraction_method = 'nested'
    admin_roles = set()
    exclude_isolated = False
    create_datapackage = False 
    license_type = None
    license_url = None
    datapackage_title = None
    kind = 'both'
    moderator = None
    generated = datetime.now()
    
    source_json = request.form['source'] if request.form.has_key('source') else None
    if not source_json:
        raise InvalidUsage('Missing parameters', status_code=400)
    
    initialize_logger('./log')
    
    logging.info("parse_source - Started")
    logging.info("parse_source - Source: %(s)s" % {'s':source_json})
    logging.info("parse_source - Extraction Kind: %(s)s" % {'s':kind})
    
    # 1. load and parse the JSON file into a RDF Graph    
    graph = ec.inference.catalyst_graph_for(source_json)
    
    # 2. extract the usersnodes,comments from the graph
    use_posts = (kind == 'posts') or (kind == 'both')
    use_ideas = (kind == 'ideas') or (kind == 'both')
    assert use_ideas or use_posts, "kind must be ideas, posts or both"
    moderator_test = None
    if moderator:
        moderator_test = partial(ec.extract.is_moderator, graph, moderator_roles=(moderator,))
    network = ec.extract.ideas.graph_to_network(generated, graph, use_ideas, use_posts, moderator_test)
    
    directed_multiedge_network = calculate_network_metrics({}, {}, {}, network, timestep_size, timestep_window, timestep_count)
    
    eu.resource.write_network(network, \
                     directed_multiedge_network, \
                     generated, \
                     create_datapackage, \
                     datapackage_title, \
                     license_type, \
                     license_url, \
                     destination_path)

    # return the result URL
    tag = generated.strftime('%Y-%m-%d-%H-%M-%S')
    base_path = os.path.join("/json/data", tag)
    result_path = os.path.join(base_path, "network.min.json")
    
    logging.info("Completed: %(s)s" % {'s':result_path})
    return jsonify({'last': tag, 'base_path': base_path, 'metrics': 'network.min.json', 'gexf': 'network.gexf', 'datapackage': 'datapackage.json' })
Пример #4
0
def main():
    initialize_logger("./log")
    generated = datetime.now()

    users_resource, nodes_resource, comments_resource, node_title_field, timestep_size, timestep_window, timestep_count, username, password, extraction_method, admin_roles, exclude_isolated, dumpto, create_datapackage, datapackage_title, license_type, license_url, destination_path = parse_options(
        sys.argv[1:]
    )

    logging.info("Network processing - started")

    # Load the files
    allusers, allnodes, allcomments = load_files(
        users_resource, nodes_resource, comments_resource, username, password, extraction_method, dumpto, generated
    )

    # extract a normalized set of data
    nodes_map, posts_map, comments_map = eu.extract.normalized_data(
        allusers, allnodes, allcomments, node_title_field, admin_roles, exclude_isolated
    )

    # this is the network object
    # going forward it should be read from a serialized format to handle caching
    network = {}

    # Add some file metadata
    network["meta"] = {}
    # Timestamp of the file generation (to show in the dashboard)
    network["meta"]["generated"] = int(time.mktime(generated.timetuple()))  # Windows-compatible

    network["edges"] = extract_edges(nodes_map, comments_map)
    network["edges"] += extract_multiauthor_post_edges(nodes_map, posts_map)

    # filter out nodes that have not participated to the full:conversations
    inactive_nodes = [v for v in nodes_map.values() if not v["active"]]
    logging.info("inactive nodes: %(n)i" % {"n": len(inactive_nodes)})
    network["nodes"] = [v for v in nodes_map.values() if v["active"]]

    directed_multiedge_network = calculate_network_metrics(
        nodes_map, posts_map, comments_map, network, timestep_size, timestep_window, timestep_count
    )

    eu.resource.write_network(
        network,
        directed_multiedge_network,
        generated,
        create_datapackage,
        datapackage_title,
        license_type,
        license_url,
        destination_path,
    )

    logging.info("Completed")
Пример #5
0
def main():
    initialize_logger('./log')

    generated = datetime.now()
    kind, source, destination_path, moderator, timestep_size, timestep_window, timestep_count, create_datapackage, license_type, license_url, datapackage_title = parse_options(
        sys.argv[1:])
    logging.info("Parsing catalyst - Started")
    logging.info("Parsing catalyst - Source file: %(s)s" % {'s': source})
    logging.info("Parsing catalyst - Output directory: %(s)s" %
                 {'s': destination_path})
    logging.info("Parsing catalyst - Extraction Kind: %(s)s" % {'s': kind})

    # 1. load and parse the JSON file into a RDF Graph

    graph = ec.inference.catalyst_graph_for(source)

    # 2. extract the usersnodes,comments from the graph
    use_posts = (kind == 'posts') or (kind == 'both')
    use_ideas = (kind == 'ideas') or (kind == 'both')
    assert use_ideas or use_posts, "kind must be ideas, posts or both"
    moderator_test = None
    if moderator:
        moderator_test = partial(ec.extract.is_moderator,
                                 graph,
                                 moderator_roles=(moderator, ))
    network = ec.extract.ideas.graph_to_network(generated, graph, use_ideas,
                                                use_posts, moderator_test)

    directed_multiedge_network = calculate_network_metrics({}, {}, {}, network,
                                                           timestep_size,
                                                           timestep_window,
                                                           timestep_count)

    eu.resource.write_network(network, \
                     directed_multiedge_network, \
                     generated, \
                     create_datapackage, \
                     datapackage_title, \
                     license_type, \
                     license_url, \
                     destination_path)

    logging.info("Parsing catalyst - Completed")
def main():
    initialize_logger('./albertoEdgesenseLog')
    generated = datetime.now()

    users_resource, \
    nodes_resource, \
    comments_resource, \
    node_title_field, \
    timestep_size, \
    timestep_window, \
    timestep_count, \
    username, \
    password, \
    extraction_method, \
    admin_roles, \
    exclude_isolated, \
    dumpto, \
    create_datapackage, \
    datapackage_title, \
    license_type, \
    license_url, \
    destination_path = parse_options(sys.argv[1:])

    logging.info("Network processing - started")

    # Load the files
    allusers, allnodes, allcomments = load_files(users_resource,
                                                 nodes_resource,
                                                 comments_resource, username,
                                                 password, extraction_method,
                                                 dumpto, generated)

    # extract a normalized set of data
    nodes_map, posts_map, comments_map = eu.extract.normalized_data(
        allusers, allnodes, allcomments, node_title_field, admin_roles,
        exclude_isolated)

    # this is the network object
    # going forward it should be read from a serialized format to handle caching
    network = {}

    # Add some file metadata
    network['meta'] = {}
    # Timestamp of the file generation (to show in the dashboard)
    network['meta']['generated'] = int(generated.strftime("%s"))

    network['edges'] = extract_edges(nodes_map, comments_map)
    network['edges'] += extract_multiauthor_post_edges(nodes_map, posts_map)

    # filter out nodes that have not participated to the full:conversations
    inactive_nodes = [v for v in nodes_map.values() if not v['active']]
    logging.info("inactive nodes: %(n)i" % {'n': len(inactive_nodes)})
    network['nodes'] = [v for v in nodes_map.values() if v['active']]

    directed_multiedge_network = calculate_network_metrics(
        nodes_map, posts_map, comments_map, network, timestep_size,
        timestep_window, timestep_count)

    eu.resource.write_network(network, \
                     directed_multiedge_network, \
                     generated, \
                     create_datapackage, \
                     datapackage_title, \
                     license_type, \
                     license_url, \
                     destination_path)

    logging.info("Completed")