def round_generate(self): """ Generate objects from the scraped data. """ socgr = Graph.Read(self.fp_i("soc.graphml")) gumap = dict_load(self.fp_i("group-user.map")) pddb = self.db("prod-doc") dppb = self.db("doc-prod") dtdb = self.db("doc-tag") tcdb = self.db("tag-cluster") phdb = self.db("p_idx", lrusize=self.cache) phsb = self.db("p_idx_s") pgdb = self.db("p_tgr", lrusize=self.cache) pgsb = self.db("p_tgr_s") FILE_IDX = "idx.graphml" FILE_CMM = "communities.map" FILE_TGR = "tgr.graphml" FILE_PTB = "ptb.graphml" FILE_PTB_U = "ptables.map" sg = SampleGenerator(socgr, gumap, pddb, dppb, dtdb, tcdb, phdb, phsb, pgdb, pgsb) # indexes if not self.fp_exists(FILE_IDX): sg.generateIndexes() sg.prodgr.write(self.fp_o(FILE_IDX)) else: sg.prodgr = Graph.Read(self.fp_i(FILE_IDX)) # communities if not self.fp_exists(FILE_CMM): sg.generateCommunities() dict_save(dict(enumerate(sg.comm)), self.fp_o(FILE_CMM)) else: sg.comm = [v for k, v in sorted(dict_load(self.fp_i(FILE_CMM)).iteritems())] # tgraphs if not self.fp_exists(FILE_TGR): sg.generateTGraphs() sg.sprdgr.write(self.fp_o(FILE_TGR)) else: sg.sprdgr = Graph.Read(self.fp_i(FILE_TGR)) # ptables if not self.fp_exists(FILE_PTB): sg.generatePTables() sg.ptabgr.write(self.fp_o(FILE_PTB)) dict_save(sg.ptbmap, self.fp_o(FILE_PTB_U)) else: sg.ptabgr = Graph.Read(self.fp_i(FILE_PTB)) sg.ptbmap = dict_load(self.fp_i(FILE_PTB_U)) LOG.info("generation complete; don't forget to run `postgen -d %s`" % self.base) if self.interact: code.interact(banner=self.banner(locals()), local=locals())
def round_examine(self, *args): """ Examine objects through the python interactive interpreter. """ socgr = Graph.Read(self.fp_i("soc.graphml")) gumap = dict_load(self.fp_i("group-user.map")) pddb = self.db("prod-doc") dppb = self.db("doc-prod") dtdb = self.db("doc-tag") tddb = self.db("tag-doc") tcdb = self.db("tag-cluster") totalsize = int(self.fp_i("doc-tag.len").read()) phdb = self.db("p_idx") phsb = self.db("p_idx_s") pgdb = self.db("p_tgr") pgsb = self.db("p_tgr_s") ptabgr = Graph.Read(self.fp_i("ptb.graphml")) prodgr = Graph.Read(self.fp_i("idx.graphml")) sprdgr = Graph.Read(self.fp_i("tgr.graphml")) stats = SampleStats(pddb, dppb, dtdb, tddb, totalsize, ptabgr, prodgr, sprdgr) reports = [] for arg in args: with open(os.path.join(self.dir_res, arg)) as fp: chaps = read_chapters(fp) reports.append(QueryReport.from_chapters(chaps)) stats.printReports(reports, pretty=self.pretty) if self.interact: code.interact(banner=self.banner(locals()), local=locals())
def round_writeall(self): """ Write objects from the generated data. """ socgr = Graph.Read(self.fp_i("soc.graphml")) gumap = dict_load(self.fp_i("group-user.map")) totalsize = int(self.fp_i("doc-tag.len").read()) phdb = self.db("p_idx") pgdb = self.db("p_tgr") ss = SampleWriter(phdb, pgdb, totalsize) ss.writeIndexes(self.dir_idx) ss.writeTGraphs(self.dir_tgr) if self.interact: code.interact(banner=self.banner(locals()), local=locals())
def round_photo(self): """ Scrape photos of the collected producers. """ socgr = Graph.Read(self.fp_i("soc.graphml")) gumap = dict_load(self.fp_i("group-user.map")) pddb = self.db("prod-doc") self.ff.commitUserPhotos(socgr.vs["id"], pddb) self.ff.commitGroupPhotos(gumap, pddb) self.ff.pruneProducers(socgr, gumap, pddb) socgr.write_graphml(self.fp_o("soc.graphml")) dict_save(gumap, self.fp_o("group-user.map")) if self.interact: code.interact(banner=self.banner(locals()), local=locals())
'and the file must define keys "api_key" and "secret", and optionally "token".%s' % exstr) config.add_option("-i", "--interact", action="store_true", dest="interact", help = "Go into interactive mode after performing a round, to examine the objects created") config.add_option("-c", "--cache", type="int", metavar="SIZE", default=0, help = "Cache size for database objects (only sometimes used, eg. pgdb, phdb in round 'generate')") config.add_option("-v", type="int", metavar="LEVEL", default=100, help = 'Verbosity level (1-50; 1 most verbose, 20 standard)') config.add_option("-p", "--pretty", action="store_true", dest="pretty", help = "Pretty print (only for some outputs)") (opts, args) = config.parse_args() kwargs = opts.__dict__ with open(opts.key) as fp: try: keys = dict_load(fp) for k in ["api_key", "secret"]: if k in keys: kwargs[k] = keys[k] else: raise ValueError('key "%s" not found' % k) for k in ["token"]: if k in keys: kwargs[k] = keys[k] except ValueError, e: print >>sys.stderr, "bad keyfile format in %s: %r" % (opts.key, e) sys.exit(1) del opts.key if len(args) < 1: