示例#1
0
def write_clusters(xmlcollection, clusters, base_clust_dir, type_='soft'):
    """
    @param xmlcollection: collection of XML documents
    @param clusters: Structure containing the clustered document
                     indices and the stems used for doing so.
    @param base_clust_dir: Where the base directory for storing
                           clusters reside.
    @param type_: Default arg is 'soft' indicating our clusters
                 params reflects soft clusters. Other possible
                 argument: 'hard' for hard clustering.
    """
    clust_dir = base_clust_dir + type_ + sep
    """
    try:
        makedirs(clust_dir)
    except OSError, e:
        print(e)
    """
    
    # XXX: Check
    clust_no = 1
    for docs, clust_stems in clusters:
        specific_clust_dir = clust_dir + str(clust_no) + sep
        try:
            makedirs(specific_clust_dir)
        except OSError, e:
            print(e)
        for doc_id in docs:
            xmldoc = xmlcollection.get_doc(doc_id)
            f = open(specific_clust_dir +  xmldoc.get_id(), "w", get_def_enc())
            f.write(xmldoc.get_rawcontent())
            f.close()
        print "clust_dir: ", clust_dir, "clust_no: ", clust_no, " [written]"
        f = open(specific_clust_dir + "clust_stems", "w", get_def_enc())
        g = open("/home/hernani/clust_stems", "a", get_def_enc()) # DEBUG line
        for clust_stem in clust_stems:
            f.write(str(clust_stem) + "\n") # stem should come here directly
            g.write(str(clust_stem) + "\n") # DEBUG line
        f.close()
        g.close()
        clust_no += 1
示例#2
0
文件: d3_js.py 项目: 2mh/wahatttt
def export_d3_js(G, 
				files_dir=get_web_output_dir(), 
				graphname=get_def_graph_name(), 
				group=None, 
				width=get_webgraph_res()[0], 
				height=get_webgraph_res()[1], 
				node_labels=False, 
				encoding=get_def_enc()):
	"""
	A function that exports a NetworkX graph as an interavtice D3.js object.  
	The function builds a folder, containing the graph's formatted JSON, the D3.js 
	JavaScript, and an HTML page to load the graph in a browser.
	
	Parameters
	----------
	G : graph
		a NetworkX graph
	files_dir : string, optional
		name of directory to save files
	graphname : string, optional
		the name of the graph being save as JSON, will appears in directory as 'graphname.json'
	group : string, optional
		The name of the 'group' key for each node in the graph. This is used to 
		assign nodes to exclusive partitions, and for node coloring if visualizing.
	width : int, optional
		width (px) of display frame for graph object in browser window
	height : int, optional
		height (px) of display frame for graph object in browser window
	node_labels : bool, optional
		If true, nodes are displayed with labels in browser
	encoding: string, optional
       Specify which encoding to use when writing file.
		
	Examples
	--------
	>>> from scipy import random
	>>> from networkx.readwrite import d3_js
	>>> G = nx.random_lobster(20, .8, .8)
	>>> low = 0
	>>> high = 5
	>>> G.add_nodes_from(map(lambda i: (i, {'group': random.random_integers(low, high, 1)[0]}), G.nodes()))
	>>> G.add_edges_from(map(lambda e: (e[0], e[1], {'weight': random.random_integers(low+1, high, 1)[0]}), G.edges()))
	>>> d3_js.export_d3_js(G, files_dir="random_lobster", graphname="random_lobster_graph", node_labels=False)
	"""
	if not os.path.exists(files_dir):
	    os.makedirs(files_dir)
	
	# Begin by creating the necessary JS and HTML files

	write_d3_js(G, path=files_dir+"/"+graphname+".json", group=group, encoding=encoding)
	
	
	graph_force_html = open(files_dir+'/'+graphname+'.html', 'w')
	for line in d3_html.split("\n"):
		if line.find('"../../d3.js"') > 0:
			line = line.replace('"../../d3.js"', '"d3/d3.js"')
		if line.find('"../../d3.geom.js"') > 0:
			line = line.replace('"../../d3.geom.js"', '"d3/d3.geom.js"')
		if line.find('"../../d3.layout.js"') > 0:
			line = line.replace('"../../d3.layout.js"', '"d3/d3.layout.js"')
		if line.find('"force.css"') > 0:
			line = line.replace('"force.css"', '"d3/force.css"')
		if line.find('"force.js"') > 0:
			line = line.replace('"force.js"', '"'+graphname+'.js"')
		graph_force_html.write(line+'\n'.encode(encoding))
	graph_force_html.close()
示例#3
0
    argv.append(get_mailfolder() + "*")
for arg in argv[1:]:
    for filename in glob(arg):
        # If works, document is well-formed
        try:
            parseFile(filename)
        # If exception occurs, document is not well-formed; add to 
        # collection of invalid docs.
        except Exception, e:
            add_invalid_docs(filename, str(e))
            print filename
            continue
        
# Prepare XML file to write invalid input XML files of the
# collection into.
invalid_xml_filehandler = open(get_invalid_xml_filename(), "w", get_def_enc())
invalid_xmldoc = Doc()
invalid_xmldocs = invalid_xmldoc.createElement("invalid_xmldocsection")
invalid_xmldoc.appendChild(invalid_xmldocs)
invalidstat = defaultdict(int)

# Check collection of invalid docs and effectively write XML
# invalid file.
for err, no in invalidstat.items():
    print err + " : " + str(no)
invalid_xml_filehandler.write(invalid_xmldoc.toprettyxml())
invalid_xml_filehandler.close()
if len(invalidstat.values()) == 0:
    print "No XML errors found in " + get_mailfolder()
else:
    print "XML file with detailed error info written to " \