def __init__(self, filename, display_id=None, timestamp=None, parent=None, load = True): super( GeneralDataRecord, self ).__init__( timestamp ) #print "RCR110:", type(filename), isinstance(filename, AstroData) if isinstance(filename, GeneralData): self.filename = filename.filename self.gd = filename self.parent = parent #filename.filename elif type( filename ) == str: self.filename = filename if load == True: self.gd = GeneralData.create_data_object( filename ) else: self.gd = None self.parent = parent elif type( filename ) == GeneralDataRecord: gdr = filename self.display_id = gdr.display_id self.filename = gdr.filename self.gd = gdr.ad self.parent = gdr.parent return else: raise "BAD ARGUMENT" ##@@TODO: display_id may be obsolete self.display_id = display_id
def adaptSetType(self, rc): for inp in rc.get_inputs(): rec = inp.recommend_data_object() log.stdinfo("adaptSetType: recommended dataset object %s" % rec) mandc = None if rec: # we'll take the random first if there is more than one recommendation, atm typ = None for typ in rec: mandc = rec[typ] if mandc: break if mandc: mod,clas = mandc result = "module=%s class=%s" % (mod, clas) try: newset = GeneralData.create_data_object(inp, hint=mandc) newset.add("types", typ) rc.report_output(newset) except: rc.report_output(inp, stream="") raise else: result = "..no recommendation.." log.info("(pSR18) %s-> %s" % (inp.basename, result)) yield rc
def adaptSetType(self, rc): for inp in rc.get_inputs(): rec = inp.recommend_data_object() log.stdinfo("adaptSetType: recommended dataset object %s" % rec) mandc = None if rec: # we'll take the random first if there is more than one recommendation, atm typ = None for typ in rec: mandc = rec[typ] if mandc: break if mandc: mod, clas = mandc result = "module=%s class=%s" % (mod, clas) try: newset = GeneralData.create_data_object(inp, hint=mandc) newset.add("types", typ) rc.report_output(newset) except: rc.report_output(inp, stream="") raise else: result = "..no recommendation.." log.info("(pSR18) %s-> %s" % (inp.basename, result)) yield rc
def store_datasets(dataset_names, remove_local = False, elements = None): datasetnames = dataset_names if len(args.datasets)>5: if not args.all: print tc.colored( "%d datasets, showing first 5, use --all to show all" % len(args.datasets), "red", "on_white") datasetnames = args.datasets[:5] for fname in datasetnames: print " DATASET: %s" % tc.colored(fname, attrs=["bold"]) setref = GeneralData.create_data_object(fname) if setref == None: continue setref.put("_data.warehouse.types", setref.get_types()) if elements: setref.put("_data.warehouse.elements", elements) # populate_region may rely on the elements if hasattr(setref, "populate_region"): setref.populate_region() pkg = package_class(setref=setref) setref.put("_data.warehouse.store_path", pkg.get_store_path(setref, elements = elements)) setref.put("_data.warehouse.store_dir", os.path.dirname(pkg.get_store_path(setref))) print " TYPES: %s" % tc.colored(", ".join( setref.get_types() ) , "blue", "on_white") print "STORE_KEY: %s" % pkg.get_store_path(setref) print "STORE_DIR: %s" % os.path.dirname(pkg.get_store_path(setref)) setref.do_write_header() if args.store or args.archive: pkg.transport_to_warehouse(remove_local = remove_local)
def highlight(self, rc): import string pd.set_option("display.width", 120) pd.set_option("display.max_colwidth", 120) log.status("pC25:highlight") startcol = rc["start"] if rc["start"] else "p1_4" endcol = rc["end"] if rc["end"] else "p1000_4" counties = GeneralData.create_data_object("msa_county_reference12.h5") naicsinfo = GeneralData.create_data_object("6-digit_2012_Codes.h5") cdf = counties.dataframe ndf = naicsinfo.dataframe for inp in rc.get_inputs(): df = inp.dataframe cmap = business_cols(df) busy = df[df["est"] > 100] maxind = busy.loc[:, startcol:endcol].idxmax() log.status("numrows=%d" % len(inp.dataframe)) #log.status("maxind=\n%s" % maxind) for (key, val) in maxind.iteritems(): log.status( "====\nmax %s companies of this size = %s %s%%" % (key, df[cmap[key]].iloc[val], df[key].iloc[val] * 100)) msa = df["msa"].iloc[val] log.status( "msa = %s %s " % (msa, COLORSTR(cdf[cdf["msa"] == msa].iloc[0]["name_msa"], attrs=["bold"]))) naicsstr = df["naics"].iloc[val] cnt = naicsstr.count("/") order = pow(10, cnt) naics = naicsstr.replace("/", "0") try: naics = int(naics) nline = ndf[ndf.iloc[:, 0] >= naics][ndf.iloc[:, 0] < naics + order] except: nline = "couldn't find" pass log.status("naics = %s\n%s" % (naicsstr, nline)) yield rc
def highlight(self, rc): import string pd.set_option("display.width",120) pd.set_option("display.max_colwidth",120) log.status("pC25:highlight") startcol = rc["start"] if rc["start"] else "p1_4" endcol = rc["end"] if rc["end"] else "p1000_4" counties = GeneralData.create_data_object("msa_county_reference12.h5") naicsinfo = GeneralData.create_data_object("6-digit_2012_Codes.h5") cdf = counties.dataframe ndf = naicsinfo.dataframe for inp in rc.get_inputs(): df = inp.dataframe cmap = business_cols(df) busy = df[df["est"]>100] maxind = busy.loc[:, startcol:endcol].idxmax() log.status("numrows=%d" %len(inp.dataframe)) #log.status("maxind=\n%s" % maxind) for (key,val) in maxind.iteritems(): log.status("====\nmax %s companies of this size = %s %s%%" % (key, df[cmap[key]].iloc[val], df[key].iloc[val]*100)); msa = df["msa"].iloc[val] log.status("msa = %s %s " % (msa, COLORSTR(cdf[cdf["msa"]==msa].iloc[0]["name_msa"], attrs=["bold"]) )) naicsstr = df["naics"].iloc[val] cnt = naicsstr.count("/") order = pow(10,cnt) naics = naicsstr.replace("/","0") try: naics = int(naics) nline = ndf[ndf.iloc[:,0] >= naics][ndf.iloc[:,0] < naics+order] except: nline = "couldn't find" pass log.status("naics = %s\n%s" % (naicsstr, nline)) yield rc
def naics_interpret(self, rc): pd.set_option("display.width",120) pd.set_option("display.max_colwidth",120) counties = GeneralData.create_data_object("msa_county_reference12.h5") naicsinfo = GeneralData.create_data_object("6-digit_2012_Codes.h5") cdf = counties.dataframe ndf = naicsinfo.dataframe for inp in rc.get_inputs(): df = inp.dataframe df = df.reset_index() cols = list(df.columns) cols[0] = "naics" df.columns = cols df["industry"] = df["naics"] ncols = list(df.columns.values) cols = [ncols[-1]] cols.extend(ncols[:-1]) df = df[cols] for i in range(len(df)): naics = df.iloc[i]["naics"] naics = int(naics) industry = ndf[ndf.iloc[:,0] == naics].iloc[0,1] df["industry"].iloc[i] = industry cols = list(df.columns) #log.status("naics = %s" % naics) #log.status("industry = %s" % industry) #log.status("%s" % df.iloc[i]) inp.dataframe = df rc.report_output(inp) yield rc
def naics_interpret(self, rc): pd.set_option("display.width", 120) pd.set_option("display.max_colwidth", 120) counties = GeneralData.create_data_object("msa_county_reference12.h5") naicsinfo = GeneralData.create_data_object("6-digit_2012_Codes.h5") cdf = counties.dataframe ndf = naicsinfo.dataframe for inp in rc.get_inputs(): df = inp.dataframe df = df.reset_index() cols = list(df.columns) cols[0] = "naics" df.columns = cols df["industry"] = df["naics"] ncols = list(df.columns.values) cols = [ncols[-1]] cols.extend(ncols[:-1]) df = df[cols] for i in range(len(df)): naics = df.iloc[i]["naics"] naics = int(naics) industry = ndf[ndf.iloc[:, 0] == naics].iloc[0, 1] df["industry"].iloc[i] = industry cols = list(df.columns) #log.status("naics = %s" % naics) #log.status("industry = %s" % industry) #log.status("%s" % df.iloc[i]) inp.dataframe = df rc.report_output(inp) yield rc
def load(self): self.gd = GeneralData.create_data_object(self.filename) # print "RCR221: loading %s %s" %(self.filename, self.gd) return self
def on_message(self, message): print "message:",type(message),message msg = json.loads(message) cmd = msg["cmd"] if cmd == "depot_msg": print "handle depot_msg" subcmd = msg["subcmd"] if "subcmd" in msg else None ##### DISPLAY if subcmd == "display": print "client wants a %s" % IMEXT fn = msg["options"]["args"][0] imgname = "%s.%s" % (fn,imext) numnz=-1 if not os.path.exists(imgname): stats = os.stat(fn) imsize = stats.st_size a = GeneralData.create_data_object(fn) if imsize > 100000: progct = { "cmd":"nrm_depot", "subcmd":"display_status", "status_msg": "creating quick view on server" } pmsg = json.dumps(progct) self.ws.send(pmsg) nd = a.get_nd(1) if a.data.RasterCount >=3: cd = np.zeros( (nd.shape[0], nd.shape[1], 3), dtype=np.uint8) cd[:,:,0] = nd[:] for i in range(1,a.data.RasterCount-1): xd = a.get_nd(i+i) cd[:,:,i] = xd[:,:] nd = cd if imsize > 100000: progct = { "cmd":"nrm_depot", "subcmd":"display_status", "status_msg": "produce %s" % imext } pmsg = json.dumps(progct) self.ws.send(pmsg) if nd.shape[0] > 1000: bd = nd[::3,::3] else: bd = nd a = imshow(bd, interpolation = "none", extent=[0,nd.shape[1], nd.shape[0], 0]) if imsize > 100000: progct = { "cmd":"nrm_depot", "subcmd":"display_status", "status_msg": "transfering" } pmsg = json.dumps(progct) self.ws.send(pmsg) savefig(imgname, bbox_inches='tight', dpi=32) image = open(imgname) done = False imdata = image.read() datastr = b64encode(imdata) cmdct = { "num_nonzero":numnz, "cmd":"nrm_depot", "subcmd":"display", "answering":msg, "data64":"data:image/%s;base64,%s" % (imext,datastr) } msg = json.dumps(cmdct) self.ws.send(msg) return elif subcmd == "local_data": print "client wants local_data description" ldata = {} for root,dirs, files in os.walk("."): ldata["root"] = root ldata["dirs"] = dirs ldata["files"] = files datasets = [] datasets_ct = {} setrefs = [] ldata["datasets"] = datasets print "sous45:", dw_info ldata["datawarehouse"] = dw_info for fil in files: ext = os.path.splitext(fil)[1] #print "sous37: ext", ext ext_type = None if len(ext): ext = ext[1:] # setref pairing # put setrefs in secondary list to check later if ext == "setref": setrefs.append(fil) elif ext in generaldata._data_object_classes: ext_type = ".".join(generaldata._data_object_classes[ext]) ds = { "filename":fil, "ext_type":ext_type } imgname = "%s.%s" % (fil,imext) if os.path.exists(imgname): ds["img_exists"] = True else: ds["img_exists"] = False datasets.append(ds) # @@ISSUE?: fil cannot already be in dict right? datasets_ct[fil] = ds ## don't recurse into subdirectores break #setrefs for fil in setrefs: rawname = fil[:-7] if rawname in datasets_ct: datasets_ct[rawname]["has_setref"] = True datasets_ct[rawname]["setref_name"] = fil srfile = open(fil) srstr = srfile.read() srfile.close() setrefct = json.loads(srstr) datasets_ct[rawname]["setref"] = setrefct ldata["cmd"] = "nrm_depot" ldata["subcmd"] = "local_data" mtxt = json.dumps(ldata) #print "sous37:",mtxt self.ws.send(mtxt) return elif cmd == "run_recipe": # build commands cmdargs = ["kit"] opts = msg["options"] positional = opts["args"] del opts["args"] for key in opts: if opts[key] != True: if len(key) == 1: opt = "-%s '%s'" % (key, opts[key]) else: opt = "--%s '%s'" % (key, opts[key]) else: if len(key) == 1: opt = "-%s" % key else: opt = "--%s" % key cmdargs.append(opt) cmdargs.extend(positional) args = cmdargs cmdline = " ".join(cmdargs) print "sous35: args:", args proc = Popen(cmdline, shell=True, stdout = PIPE, stderr = PIPE) print "process = ", proc self._ra_proc = proc self._ra_stdin = proc.stdin self._ra_stdout = proc.stdout self._ra_stderr = proc.stderr done = False i = 0 while not done: #sleep(.1) proc.stdout.flush() #print "reading stdout" #buf = proc.stdout.readline().strip() #buf = proc.stdout.read(10) buf = "" #print "reading stderr" errbuf = proc.stderr.readline().strip() #errbuf = proc.stderr.read(10) # #print "sous: |%s|%s|%s "%( buf,errbuf, proc.poll()) cmdct = { "cmd":"nrm_log", "stdio": buf, "stderr": errbuf, "ansi":"%s%s" % ( buf, errbuf) } msg = json.dumps(cmdct) #self.ws.send(buf) #self.ws.send(errbuf) self.ws.send(msg) #print "sous:poll" if proc.poll() != None: done = True i+=1 print "END CONNECTION" return
# elements are what get's printed into the shelf/format strings elements = {} try: packager_key = int(packager) package_class_struct = package_class_list[packager_key] except: for pckr in package_class_list: if packager == pckr.keys()[0]: package_class_struct = pckr print "packager = %s" % package_class_struct.keys()[0] package_type = None package_class = None if args.fileinfo: for fil in args.dataset: gd = GeneralData.create_data_object(fil) print "File: %s" % gd.basename print " %s" % ", ".join(gd.get_types()) # some flags imply others if args.store or args.archive: args.all = True remove_local = False if args.archive: remove_local = True if args.store: remove_local = False if args.remove_local != None: remove_local = args.remove_local