示例#1
0
	def round_generate(self):
		"""
		Generate objects from the scraped data.
		"""
		socgr = Graph.Read(self.fp_i("soc.graphml"))
		gumap = dict_load(self.fp_i("group-user.map"))

		pddb = self.db("prod-doc")
		dppb = self.db("doc-prod")
		dtdb = self.db("doc-tag")
		tcdb = self.db("tag-cluster")

		phdb = self.db("p_idx", lrusize=self.cache)
		phsb = self.db("p_idx_s")
		pgdb = self.db("p_tgr", lrusize=self.cache)
		pgsb = self.db("p_tgr_s")

		FILE_IDX = "idx.graphml"
		FILE_CMM = "communities.map"
		FILE_TGR = "tgr.graphml"
		FILE_PTB = "ptb.graphml"
		FILE_PTB_U = "ptables.map"

		sg = SampleGenerator(socgr, gumap, pddb, dppb, dtdb, tcdb, phdb, phsb, pgdb, pgsb)

		# indexes
		if not self.fp_exists(FILE_IDX):
			sg.generateIndexes()
			sg.prodgr.write(self.fp_o(FILE_IDX))
		else:
			sg.prodgr = Graph.Read(self.fp_i(FILE_IDX))

		# communities
		if not self.fp_exists(FILE_CMM):
			sg.generateCommunities()
			dict_save(dict(enumerate(sg.comm)), self.fp_o(FILE_CMM))
		else:
			sg.comm = [v for k, v in sorted(dict_load(self.fp_i(FILE_CMM)).iteritems())]

		# tgraphs
		if not self.fp_exists(FILE_TGR):
			sg.generateTGraphs()
			sg.sprdgr.write(self.fp_o(FILE_TGR))
		else:
			sg.sprdgr = Graph.Read(self.fp_i(FILE_TGR))

		# ptables
		if not self.fp_exists(FILE_PTB):
			sg.generatePTables()
			sg.ptabgr.write(self.fp_o(FILE_PTB))
			dict_save(sg.ptbmap, self.fp_o(FILE_PTB_U))
		else:
			sg.ptabgr = Graph.Read(self.fp_i(FILE_PTB))
			sg.ptbmap = dict_load(self.fp_i(FILE_PTB_U))

		LOG.info("generation complete; don't forget to run `postgen -d %s`" % self.base)

		if self.interact: code.interact(banner=self.banner(locals()), local=locals())
示例#2
0
	def round_examine(self, *args):
		"""
		Examine objects through the python interactive interpreter.
		"""
		socgr = Graph.Read(self.fp_i("soc.graphml"))
		gumap = dict_load(self.fp_i("group-user.map"))

		pddb = self.db("prod-doc")
		dppb = self.db("doc-prod")
		dtdb = self.db("doc-tag")
		tddb = self.db("tag-doc")
		tcdb = self.db("tag-cluster")
		totalsize = int(self.fp_i("doc-tag.len").read())

		phdb = self.db("p_idx")
		phsb = self.db("p_idx_s")
		pgdb = self.db("p_tgr")
		pgsb = self.db("p_tgr_s")

		ptabgr = Graph.Read(self.fp_i("ptb.graphml"))
		prodgr = Graph.Read(self.fp_i("idx.graphml"))
		sprdgr = Graph.Read(self.fp_i("tgr.graphml"))

		stats = SampleStats(pddb, dppb, dtdb, tddb, totalsize, ptabgr, prodgr, sprdgr)

		reports = []
		for arg in args:
			with open(os.path.join(self.dir_res, arg)) as fp:
				chaps = read_chapters(fp)
				reports.append(QueryReport.from_chapters(chaps))

		stats.printReports(reports, pretty=self.pretty)

		if self.interact: code.interact(banner=self.banner(locals()), local=locals())
示例#3
0
	def round_writeall(self):
		"""
		Write objects from the generated data.
		"""
		socgr = Graph.Read(self.fp_i("soc.graphml"))
		gumap = dict_load(self.fp_i("group-user.map"))

		totalsize = int(self.fp_i("doc-tag.len").read())
		phdb = self.db("p_idx")
		pgdb = self.db("p_tgr")

		ss = SampleWriter(phdb, pgdb, totalsize)
		ss.writeIndexes(self.dir_idx)
		ss.writeTGraphs(self.dir_tgr)

		if self.interact: code.interact(banner=self.banner(locals()), local=locals())
示例#4
0
	def round_photo(self):
		"""
		Scrape photos of the collected producers.
		"""
		socgr = Graph.Read(self.fp_i("soc.graphml"))
		gumap = dict_load(self.fp_i("group-user.map"))

		pddb = self.db("prod-doc")
		self.ff.commitUserPhotos(socgr.vs["id"], pddb)
		self.ff.commitGroupPhotos(gumap, pddb)

		self.ff.pruneProducers(socgr, gumap, pddb)
		socgr.write_graphml(self.fp_o("soc.graphml"))
		dict_save(gumap, self.fp_o("group-user.map"))

		if self.interact: code.interact(banner=self.banner(locals()), local=locals())
示例#5
0
	         'and the file must define keys "api_key" and "secret", and optionally "token".%s' % exstr)
	config.add_option("-i", "--interact", action="store_true", dest="interact",
	  help = "Go into interactive mode after performing a round, to examine the objects created")
	config.add_option("-c", "--cache", type="int", metavar="SIZE", default=0,
	  help = "Cache size for database objects (only sometimes used, eg. pgdb, phdb in round 'generate')")
	config.add_option("-v", type="int", metavar="LEVEL", default=100,
	  help = 'Verbosity level (1-50; 1 most verbose, 20 standard)')
	config.add_option("-p", "--pretty", action="store_true", dest="pretty",
	  help = "Pretty print (only for some outputs)")

	(opts, args) = config.parse_args()

	kwargs = opts.__dict__
	with open(opts.key) as fp:
		try:
			keys = dict_load(fp)
			for k in ["api_key", "secret"]:
				if k in keys:
					kwargs[k] = keys[k]
				else:
					raise ValueError('key "%s" not found' % k)
			for k in ["token"]:
				if k in keys:
					kwargs[k] = keys[k]
		except ValueError, e:
			print >>sys.stderr, "bad keyfile format in %s: %r" % (opts.key, e)
			sys.exit(1)

	del opts.key

	if len(args) < 1: