def warning_test(): """For testing warning function.""" # Should show warnings in order and only HAPIWarning {1,2} should # have a different format from warnings import warn from hapiclient.util import warning warn('Normal warning 1') warn('Normal warning 2') warning('HAPI Warning 1') warning('HAPI Warning 2') warn('Normal warning 3') warn('Normal warning 4')
def setopts(defaults, given): """Override default keyword dictionary options. kwargs = setopts(defaults, kwargs) A warning is shown if kwargs contains a key not found in default. """ from inspect import stack fname = stack()[1][1] # Override defaults for key, value in given.items(): if type(given[key]) == dict: setopts(defaults[key], given[key]) continue if key in defaults: defaults[key] = value else: warning('Ignoring invalid keyword option "%s".' % key, fname) return defaults
def hapiplot(*args, **kwargs): """Plot response from HAPI server. Demos ----- <https://github.com/hapi-server/client-python/blob/master/hapiclient/plot/hapiplot_test.py> Usage ----- data, meta = hapiplot(server, dataset, params, start, stop, **kwargs) or meta = hapiplot(data, meta, **kwargs) where data and meta are return values from `hapi()`. All parameters are plotted. If a parameter has a bins attribute, it is plotted using `heatmap()`. Otherwise, it is plotted using `timeseries()`. Returns ------- `data` is the same as that returned from `hapi()`. `meta` is the same as that returned from `hapi()` with the additon of meta['parameters'][i]['hapiplot']['figure'] is a reference to the figure (e.g., plt.gcf()). Usage example: >>> fig = meta['parameters'][i]['hapiplot']['figure'] >>> fig.set_facecolor('blue') >>> fig.axes[0].set_ylabel('new y-label') >>> fig.axes[0].set_title('new title\\nsubtitle\\nsubtitle') >>> fig.tight_layout() meta['parameters'][i]['hapiplot']['colorbar'] is a reference to the colorbar on the figure (if parameter plotted as a heatmap) meta['parameters'][i]['hapiplot']['image'] is PNG, PDF, or SVG data and is included only if `returnimage=True`. Usage example: >>> img = meta['parameters'][i]['hapiplot']['image'] >>> Image.open(io.BytesIO(img)).show() >>> # or >>> f = open('/tmp/a.png', 'wb') >>> f.write(img) >>> f.close() See Also --------- hapi: Get data from a HAPI server timeseries: Used by `hapiplot()` to HAPI parameters with no `bins` heatmap: Used by `hapiplot()` to HAPI parameters with `bins` <https://github.com/hapi-server/client-python-notebooks> kwargs ------ * logging: [False] Display console messages * usecache: [True] Use cached data * tsopts: {} kwargs for the `timeseries()` function * hmopts: {} kwargs for the `heatmap()` function Other kwargs ------------ * returnimage: [False] If True, `hapiplot()` returns binary image data * returnformat: [png], svg, or pdf * cachedir: Directory to store images. Default is hapiclient.hapi.cachedir() * useimagecache: [True] Used cached image (when returnimage=True) * saveimage: [False] Save image to `cachedir` * saveformat: [png], svg, or pdf Example -------- >>> server = 'http://hapi-server.org/servers/TestData/hapi' >>> dataset = 'dataset1' >>> start = '1970-01-01T00:00:00' >>> stop = '1970-01-02T00:00:00' >>> params = 'scalar,vector' >>> opts = {'logging': True} >>> >>> from hapiclient import hapiplot >>> hapiplot(server, dataset, params, start, stop, **opts) >>> >>> # or >>> >>> from hapiclient import hapi, hapiplot >>> data, meta = hapi(server, dataset, params, start, stop, **opts) >>> hapiplot(data, meta, **opts) """ if len(args) == 5: # For consistency with gallery and autoplot functions, allow useage of # hapiplot(server, dataset, parameters, start, stop, **kwargs) from hapiclient.hapi import hapiopts from hapiclient.hapi import hapi kwargs_allowed = hapiopts() kwargs_reduced = {} # Extract hapi() options from kwargs for key, value in kwargs.items(): if key in kwargs_allowed: kwargs_reduced[key] = value data, meta = hapi(args[0], args[1], args[2], args[3], args[4], **kwargs_reduced) meta = hapiplot(data, meta, **kwargs) return data, meta else: data = args[0] meta = args[1] # Default options opts = { 'logging': False, 'saveimage': False, 'returnimage': False, 'usecache': True, 'useimagecache': True, 'cachedir': cachedir(), 'backend': 'default', 'style': 'fast', 'title': '', 'ztitle': '', 'xlabel': '', 'ylabel': '', 'zlabel': '', 'logx': False, 'logy': False, 'logz': False, 'tsopts': {}, 'hmopts': {}, 'backend': 'default', 'rcParams': { 'savefig.dpi': 144, 'savefig.format': 'png', 'savefig.bbox': 'tight', 'savefig.transparent': False, 'figure.max_open_warning': 50, 'figure.figsize': (7, 3), 'figure.dpi': 144, 'axes.titlesize': 10, "font.family": "serif", "font.serif": rcParams['font.serif'], "font.weight": "normal" }, '_rcParams': { 'figure.bbox': 'standard' } } # Override defaults opts = setopts(opts, kwargs) from hapiclient import __version__ log('Running hapi.py version %s' % __version__, opts) # _rcParams are not actually rcParams: #'figure.bbox': 'standard', # Set to 'tight' to have fig.tight_layout() called before figure shown. if opts["saveimage"]: # Create cache directory dir = cachedir(opts['cachedir'], meta['x_server']) if not os.path.exists(dir): os.makedirs(dir) # Convert from NumPy array of byte literals to NumPy array of # datetime objects. timename = meta['parameters'][0]['name'] Time = hapitime2datetime(data[timename]) if len(meta["parameters"]) == 1: a = 0 # Time is only parameter else: a = 1 # Time plus another parameter for i in range(a, len(meta["parameters"])): meta["parameters"][i]['hapiplot'] = {} name = meta["parameters"][i]["name"] # Return cached image (case where we are returning binary image data) # imagepath() options. Only need filename under these conditions. if opts['saveimage'] or (opts['returnimage'] and opts['useimagecache']): # Will use given rc style parameters and style name to generate file name. # Assumes rc parameters of style and hapiplot defaults never change. styleParams = {} fmt = opts['rcParams']['savefig.format'] if 'rcParams' in kwargs: styleParams = kwargs['rcParams'] if 'savefig.format' in kwargs['rcParams']: kwargs['rcParams']['savefig.format'] fnameimg = imagepath(meta, i, opts['cachedir'], styleParams, fmt) if opts['useimagecache'] and opts['returnimage'] and os.path.isfile( fnameimg): log('Returning cached binary image data in ' + fnameimg, opts) meta["parameters"][i]['hapiplot']['imagefile'] = fnameimg with open(fnameimg, "rb") as f: meta["parameters"][i]['hapiplot']['image'] = f.read() continue name = meta["parameters"][i]["name"] log("Plotting parameter '%s'" % name, opts) if len(data[name].shape) > 3: # TODO: Implement more than 2 dimensions? warning( 'Parameter ' + name + ' has size with more than 2 dimensions. Plotting first two only.' ) continue # If parameter has a size with two elements, e.g., [N1, N2] # create N2 plots. if len(data[name].shape) == 3: # shape = (Time, N1, N2) nplts = data[name].shape[1] if opts['returnimage']: warning( 'Only returning first image for parameter with size[1] > 1.' ) nplts = 1 for j in range(nplts): timename = meta['parameters'][0]['name'] # Name to indicate what is plotted name_new = name + "[:," + str(j) + "]" # Reduced data ND Array datar = np.ndarray(shape=(data[name].shape[0]), dtype=[(timename, data.dtype[timename]), (name_new, data[name].dtype.str, data.dtype[name].shape[1])]) datar[timename] = data[timename] datar[name_new] = data[name][:, j] # Copy metadata to create a reduced metadata object metar = meta.copy() # Shallow copy metar["parameters"] = [] # Create parameters array with elements of Time parameter ... metar["parameters"].append(meta["parameters"][0]) # .... and this parameter metar["parameters"].append(meta["parameters"][i].copy()) # Give new name to indicate it is a subset of full parameter metar["parameters"][1]['name'] = name_new metar["parameters"][1]['name_orig'] = name # New size is N1 metar["parameters"][1]['size'] = [ meta["parameters"][i]['size'][1] ] if 'units' in metar["parameters"][1]: if type(meta["parameters"][i]['units'] ) == str or meta["parameters"][i]['units'] == None: # Same units applies to all dimensions metar["parameters"][1]["units"] = meta["parameters"][ i]['units'] else: metar["parameters"][1]["units"] = meta["parameters"][ i]['units'][j] if 'label' in metar["parameters"][1]: if type(meta["parameters"][i]['label']) == str: # Same label applies to all dimensions metar["parameters"][1]["label"] = meta["parameters"][ i]['label'] else: metar["parameters"][1]["label"] = meta["parameters"][ i]['label'][j] # Extract bins corresponding to jth column of data[name] if 'bins' in metar["parameters"][1]: metar["parameters"][1]['bins'] = [] metar["parameters"][1]['bins'].append( meta["parameters"][i]['bins'][j]) # rcParams is modified by setopts to have all rcParams. # reset to original passed rcParams so that imagepath # computes file name based on rcParams passed to hapiplot. if 'rcParams' in kwargs: opts['rcParams'] = kwargs['rcParams'] metar = hapiplot(datar, metar, **opts) meta["parameters"][i]['hapiplot'] = metar["parameters"][i][ 'hapiplot'] return meta if 'name_orig' in meta["parameters"][i]: title = meta["x_server"] + "\n" + meta["x_dataset"] + " | " + meta[ "parameters"][i]['name_orig'] else: title = meta["x_server"] + "\n" + meta["x_dataset"] + " | " + name as_heatmap = False if 'size' in meta['parameters'][ i] and meta['parameters'][i]['size'][0] > 10: as_heatmap = True if 'bins' in meta['parameters'][i]: as_heatmap = True if 'units' in meta["parameters"][i] and type( meta["parameters"][i]["units"]) == list: if as_heatmap: warning( "Not plotting %s as heatmap because components have different units." % meta["parameters"][i]["name"]) as_heatmap = False if as_heatmap: # Plot as heatmap hmopts = { 'returnimage': opts['returnimage'], 'transparent': opts['rcParams']['savefig.transparent'] } if meta["parameters"][i]["type"] == "string": warning( "Plots for only types double, integer, and isotime implemented. Not plotting %s." % meta["parameters"][i]["name"]) continue z = np.asarray(data[name]) if 'fill' in meta["parameters"][i] and meta["parameters"][i][ 'fill']: if meta["parameters"][i]["type"] == 'integer': z = z.astype('<f8', copy=False) z = fill2nan(z, meta["parameters"][i]['fill']) if 'bins' in meta['parameters'][i]: ylabel = meta["parameters"][i]['bins'][0]["name"] + " [" + meta[ "parameters"][i]['bins'][0]["units"] + "]" else: ylabel = "col %d" % i units = meta["parameters"][i]["units"] nl = "" if len(name) + len(units) > 30: nl = "\n" zlabel = name + nl + " [" + units + "]" if 'bins' in meta['parameters'][i]: if 'ranges' in meta["parameters"][i]['bins'][0]: bins = np.array(meta["parameters"][i]['bins'][0]["ranges"]) else: bins = np.array( meta["parameters"][i]['bins'][0]["centers"]) else: bins = np.arange(meta['parameters'][i]['size'][0]) dt = np.diff(Time) dtu = np.unique(dt) if len(dtu) > 1: #warning('Time values are not uniformly spaced. Bin width for ' # 'time will be based on time separation of consecutive time values.') if False and 'cadence' in meta: # Cadence != time bin width in general, so don't do this. # See https://github.com/hapi-server/data-specification/issues/75 # Kept for future reference when Parameter.bin.window or # Parameter.bin.windowWidth is added to spec. import isodate dt = isodate.parse_duration(meta['cadence']) if 'timeStampLocation' in meta: if meta['timeStampLocation'].lower() == "begin": Time = np.vstack((Time, Time + dt)) if meta['timeStampLocation'].lower() == "end": Time = np.vstack((Time - dt, Time)) if meta['timeStampLocation'].lower() == "center": Time = np.vstack((Time - dt / 2, Time + dt / 2)) else: # Default is center Time = np.vstack((Time - dt / 2, Time + dt / 2)) Time = np.transpose(Time) elif 'timeStampLocation' in meta: if meta['timeStampLocation'].lower() == "begin": Time = np.append(Time, Time[-1] + dtu[0]) if meta['timeStampLocation'].lower() == "end": Time = Time - dtu[0] Time = np.append(Time, Time[-1] + dtu[0]) if opts['xlabel'] != '' and 'xlabel' not in opts['hmopts']: hmopts['xlabel'] = opts['xlabel'] opts['hmopts']['ylabel'] = ylabel if opts['ylabel'] != '' and 'ylabel' not in opts['hmopts']: hmopts['ylabel'] = opts['ylabel'] opts['hmopts']['title'] = title if opts['title'] != '' and 'title' not in opts['hmopts']: hmopts['title'] = opts['title'] opts['hmopts']['zlabel'] = zlabel if opts['zlabel'] != '' and 'zlabel' not in opts['hmopts']: hmopts['zlabel'] = opts['zlabel'] if False: opts['hmopts']['ztitle'] = ztitle if opts['ztitle'] != '' and 'ztitle' not in opts['hmopts']: hmopts['ztitle'] = opts['ztitle'] if opts['logx'] is not False: hmopts['logx'] = True if opts['logy'] is not False: hmopts['logy'] = True if opts['logz'] is not False: hmopts['logz'] = True for key, value in opts['hmopts'].items(): hmopts[key] = value with rc_context(rc=opts['rcParams']): fig, cb = heatmap(Time, bins, np.transpose(z), **hmopts) meta["parameters"][i]['hapiplot']['figure'] = fig meta["parameters"][i]['hapiplot']['colorbar'] = cb else: tsopts = { 'logging': opts['logging'], 'returnimage': opts['returnimage'], 'transparent': opts['rcParams']['savefig.transparent'] } ptype = meta["parameters"][i]["type"] if ptype == "isotime": y = hapitime2datetime(data[name]) elif ptype == 'string': y = data[name].astype('U') else: y = np.asarray(data[name]) if 'fill' in meta["parameters"][i] and meta["parameters"][i][ 'fill']: if ptype == 'isotime' or ptype == 'string': Igood = y != meta["parameters"][i]['fill'] # Note that json reader returns fill to U not b. Nremoved = data[name].size - Igood.size if Nremoved > 0: # TODO: Implement masking so connected line plots will # show gaps as they do for NaN values. warning('Parameter ' + name + ' is of type ' + ptype + ' and has ' + str(Nremoved) + ' fill value(s). Masking is not implemented, ' 'so removing fill elements before plotting.') Time = Time[Igood] y = y[Igood] if ptype == 'integer': y = y.astype('<f8', copy=False) if ptype == 'integer' or ptype == 'double': y = fill2nan(y, meta["parameters"][i]['fill']) units = None if 'units' in meta["parameters"][i] and meta["parameters"][i][ 'units']: units = meta["parameters"][i]["units"] nl = "" if type(units) == str: if len(name) + len(units) > 30: nl = "\n" # TODO: Automatically figure out when this is needed. ylabel = name if units is not None and type(units) is not list: ylabel = name + nl + " [" + units + "]" if type(units) == list: ylabel = name if not 'legendlabels' in opts['tsopts']: legendlabels = [] if 'size' in meta['parameters'][i]: for l in range(0, meta['parameters'][i]['size'][0]): bin_label = '' bin_name = '' col_name = '' if 'bins' in meta['parameters'][i]: bin_name = meta['parameters'][i]['bins'][0]['name'] if 'label' in meta['parameters'][i]['bins'][0]: if type(meta['parameters'][i]['bins'][0] ['label']) == str: bin_name = meta['parameters'][i]['bins'][ 0]['label'] else: bin_name = meta['parameters'][i]['bins'][ 0]['label'][l] sep = '' if 'centers' in meta['parameters'][i]['bins'][ 0] and 'ranges' in meta['parameters'][i][ 'bins'][0]: bin_name = bin_name + ' bin with' sep = ';' bin_label = '' if 'units' in meta['parameters'][i]['bins'][0]: bin_units = meta['parameters'][i]['bins'][0][ 'units'] if type(bin_units) == list: if type(bin_units[l]) == str: bin_units = ' [' + bin_units[l] + ']' elif bin_units[l] == None: bin_units = ' []' else: bin_units = '' else: if type(bin_units) == str: bin_units = ' [' + bin_units + ']' else: bin_units = '' if 'centers' in meta['parameters'][i]['bins'][0]: if meta['parameters'][i]['bins'][0]['centers'][ l] is not None: bin_label = bin_label + ' center = ' + str( meta['parameters'][i]['bins'][0] ['centers'][l]) + bin_units else: bin_label = bin_label + ' center = None' if 'ranges' in meta['parameters'][i]['bins'][0]: if type(meta['parameters'][i]['bins'][0] ['ranges'][l]) == list: bin_label = bin_label + sep + ' range = [' + str( meta['parameters'][i]['bins'][0] ['ranges'][l][0]) + ', ' + str( meta['parameters'][i]['bins'][0] ['ranges'][l][1]) + ']' + bin_units else: bin_label = bin_label + sep + ' range = [None]' if bin_label != '': bin_label = 'bin:' + bin_label col_name = bin_name + '#%d' % l if col_name == '': col_name = 'col #%d' % l if 'label' in meta['parameters'][i]: #print(meta) #print(meta['parameters'][i]['label']) if type(meta['parameters'][i]['label']) == list: col_name = meta['parameters'][i]['label'][l] if type(units) == list: if len(units) == 1: legendlabels.append(col_name + ' [' + units[0] + '] ' + bin_label) elif type(units[l]) == str: legendlabels.append(col_name + ' [' + units[l] + '] ' + bin_label) elif units[l] == None: legendlabels.append(col_name + ' [] ' + bin_label) else: legendlabels.append(col_name + ' ' + bin_label) else: # Units are on y label legendlabels.append(col_name + ' ' + bin_label) tsopts['legendlabels'] = legendlabels # If xlabel in opts and opts['tsopts'], warn? if opts['xlabel'] != '' and 'xlabel' not in opts['tsopts']: tsopts['xlabel'] = opts['xlabel'] tsopts['ylabel'] = ylabel if opts['ylabel'] != '' and 'ylabel' not in opts['tsopts']: tsopts['ylabel'] = opts['ylabel'] tsopts['title'] = title if opts['title'] != '' and 'title' not in opts['tsopts']: tsopts['title'] = opts['title'] if opts['logx'] is not False and 'logx' not in opts['tsopts']: tsopts['logx'] = True if opts['logy'] is not False and 'logy' not in opts['tsopts']: tsopts['logy'] = True # Apply tsopts for key, value in opts['tsopts'].items(): tsopts[key] = value with rc_context(rc=opts['rcParams']): fig = timeseries(Time, y, **tsopts) meta["parameters"][i]['hapiplot']['figure'] = fig if opts['saveimage']: log('Writing %s' % fnameimg, opts) meta["parameters"][i]['hapiplot']['imagefile'] = fnameimg else: from io import BytesIO fnameimg = BytesIO() if opts['returnimage']: with rc_context(rc=opts['rcParams']): fig.canvas.print_figure(fnameimg) if opts['saveimage']: with open(fnameimg, mode='rb') as f: meta["parameters"][i]['hapiplot']['image'] = f.read() else: meta["parameters"][i]['hapiplot']['image'] = fnameimg.getvalue( ) else: with rc_context(rc=opts['rcParams']): fig.savefig(fnameimg) # Two calls to fig.tight_layout() may be needed b/c of bug in PyQt: # https://github.com/matplotlib/matplotlib/issues/10361 if opts['_rcParams']['figure.bbox'] == 'tight': fig.tight_layout() return meta
def hapi(*args, **kwargs): """Request data from a HAPI server. For additional documentation and demonstration, see https://github.com/hapi-server/client-python-notebooks/blob/master/hapi_demo.ipynb Version: 0.1.5b3 Parameters ---------- server : str A string with the URL to a HAPI compliant server. (A HAPI URL always ends with "/hapi"). dataset : str A string specifying a dataset from a `server` parameters: str A Comma-separated list of parameters in `dataset` start: str The start time of the requested data stop: str The end time of the requested data; end times are exclusive - the last data record returned by a HAPI server should have a timestamp before `start`. options : dict `logging` (False) - Log to console `cache` (True) - Save responses and processed responses in cachedir `cachedir` (./hapi-data) `usecache` (True) - Use files in `cachedir` if found `serverlist` (https://github.com/hapi-server/servers/raw/master/all.txt) Returns ------- result : various `result` depend on the input parameters. servers = hapi() returns a list of available HAPI server URLs from https://github.com/hapi-server/data-specification/blob/master/all.txt dataset = hapi(server) returns a dict of datasets available from a URL given by the string `server`. The dictionary structure follows the HAPI JSON structure. parameters = hapi(server, dataset) returns a dictionary of parameters in the string `dataset`. The dictionary structure follows the HAPI JSON structure. metadata = hapi(server, dataset, parameters) returns metadata associated each parameter in the comma-separated string `parameters`. The dictionary structure follows the HAPI JSON structure. data = hapi(server, dataset, parameters, start, stop) returns a dictionary with elements corresponding to `parameters`, e.g., if `parameters` = 'scalar,vector' and the number of records in the time range `start` <= t < `stop` returned is N, then data['scalar'] is a NumPy array of shape (N) data['vector'] is a NumPy array of shape (N,3) data['Time'] is a NumPy array of byte literals with shape (N). Byte literal times can be converted to Python datetimes using dtarray = hapitime2datetime(data['Time']) data, meta = hapi(server, dataset, parameters, start, stop) returns the metadata for parameters in `meta`. References ---------- * `HAPI Server Definition <https://github.com/hapi-server/data-specification>`_ Examples ---------- See https://github.com/hapi-server/client-python-notebooks """ nin = len(args) if nin > 0: SERVER = args[0] if nin > 1: DATASET = args[1] if nin > 2: PARAMETERS = args[2] if nin > 3: START = args[3] if nin > 4: STOP = args[4] # Override defaults opts = setopts(hapiopts(), kwargs) from hapiclient import __version__ log('Running hapi.py version %s' % __version__, opts) if nin == 0: # hapi() log('Reading %s' % opts['server_list'], opts) # decode('utf8') in following needed to make Python 2 and 3 types match. data = urlopen(opts['server_list']).read().decode('utf8').split('\n') data = [x for x in data if x] # Remove empty items (if blank lines) # Display server URLs to console. log('List of HAPI servers in %s:\n' % opts['server_list'], opts) for url in data: log(" %s" % url, opts) return data if nin == 1: # hapi(SERVER) # TODO: Cache url = SERVER + '/catalog' log('Reading %s' % url, opts) res = urlopen(url) meta = jsonparse(res, url) return meta if nin == 2: # hapi(SERVER, DATASET) # TODO: Cache url = SERVER + '/info?id=' + DATASET log('Reading %s' % url, opts) res = urlopen(url) meta = jsonparse(res, url) return meta if nin == 4: error('A stop time is required if a start time is given.') if nin == 3 or nin == 5: # hapi(SERVER, DATASET, PARAMETERS) or # hapi(SERVER, DATASET, PARAMETERS, START, STOP) if re.search(r', ', PARAMETERS): warning( "Removing spaces after commas in given parameter list of '" + PARAMETERS + "'") PARAMETERS = re.sub(r',\s+', ',', PARAMETERS) # urld = url subdirectory of cachedir to store files from SERVER urld = cachedir(opts["cachedir"], SERVER) if opts["cachedir"]: log('file directory = %s' % urld, opts) urljson = SERVER + '/info?id=' + DATASET # Output from urljson will be saved in a .json file. Parsed json # will be stored in a .pkl file. Metadata for all parameters is # requested and response is subsetted so only metadata for PARAMETERS # is returned. fname_root = request2path(SERVER, DATASET, '', '', '', opts['cachedir']) fnamejson = fname_root + '.json' fnamepkl = fname_root + '.pkl' if nin == 5: # Data requested # URL to get CSV (will be used if binary response is not available) urlcsv = SERVER + '/data?id=' + DATASET + '¶meters=' + \ PARAMETERS + '&time.min=' + START + '&time.max=' + STOP # URL for binary request urlbin = urlcsv + '&format=binary' # Raw CSV and HAPI Binary (no header) will be stored in .csv and # .bin files. Parsed response of either CSV or HAPI Binary will # be stored in a .npy file. # fnamepklx will contain additional metadata about the request # including d/l time, parsing time, and the location of files. fname_root = request2path(SERVER, DATASET, PARAMETERS, START, STOP, opts['cachedir']) fnamecsv = fname_root + '.csv' fnamebin = fname_root + '.bin' fnamenpy = fname_root + '.npy' fnamepklx = fname_root + ".pkl" metaFromCache = False if opts["usecache"]: if nin == 3 and os.path.isfile(fnamepkl): # Read cached metadata from .pkl file. # This returns subsetted metadata with no additional "x_" # information (which is stored in fnamepklx). log('Reading %s' % fnamepkl.replace(urld + '/', ''), opts) f = open(fnamepkl, 'rb') meta = pickle.load(f) f.close() metaFromCache = True # Remove parameters not requested. meta = subset(meta, PARAMETERS) return meta if os.path.isfile(fnamepklx): # Read subsetted meta file with x_ information log('Reading %s' % fnamepklx.replace(urld + '/', ''), opts) f = open(fnamepklx, 'rb') meta = pickle.load(f) metaFromCache = True f.close() if not metaFromCache: # No cached metadata loaded so request it from server. log('Reading %s' % urljson.replace(urld + '/', ''), opts) res = urlopen(urljson) meta = jsonparse(res, urljson) # Add information to metdata so we can figure out request needed # to generated it. Will also be used for labeling plots by hapiplot(). meta.update({"x_server": SERVER}) meta.update({"x_dataset": DATASET}) if opts["cache"]: if not os.path.exists(urld): os.makedirs(urld) if opts["cache"] and not metaFromCache: # Cache metadata for all parameters if it was not already loaded # from cache. Note that fnamepklx is written after data downloaded # and parsed. log('Writing %s ' % fnamejson.replace(urld + '/', ''), opts) f = open(fnamejson, 'w') json.dump(meta, f, indent=4) f.close() log('Writing %s ' % fnamepkl.replace(urld + '/', ''), opts) f = open(fnamepkl, 'wb') # protocol=2 used for Python 2.7 compatability. pickle.dump(meta, f, protocol=2) f.close() # Remove unrequested parameters if they have not have already been # removed (b/c loaded from cache). if not metaFromCache: meta = subset(meta, PARAMETERS) if nin == 3: return meta if opts["usecache"] and os.path.isfile(fnamenpy): # Read cached data file. log('Reading %s ' % fnamenpy.replace(urld + '/', ''), opts) f = open(fnamenpy, 'rb') data = np.load(f) f.close() # There is a possibility that the fnamenpy file existed but # fnamepklx was not found (b/c removed). In this case, the meta # returned will not have all of the "x_" information inserted below. # Code that uses this information needs to account for this. return data, meta cformats = ['csv', 'binary'] # client formats if not opts['format'] in cformats: # Check if requested format is implemented by this client. error('This client does not handle transport ' 'format "%s". Available options: %s' % (opts['format'], ', '.join(cformats))) # See if server supports binary if opts['format'] != 'csv': log('Reading %s' % (SERVER + '/capabilities'), opts) res = urlopen(SERVER + '/capabilities') caps = jsonparse(res, SERVER + '/capabilities') sformats = caps["outputFormats"] # Server formats if 'format' in kwargs and not kwargs['format'] in sformats: warning( "hapi", 'Requested transport format "%s" not avaiable ' 'from %s. Will use "csv". Available options: %s' % (opts['format'], SERVER, ', '.join(sformats))) opts['format'] = 'csv' if not 'binary' in sformats: opts['format'] = 'csv' ################################################################## # Compute data type variable dt used to read HAPI response into # a data structure. pnames, psizes, dt = [], [], [] # Each element of cols is an array with start/end column number of # parameter. cols = np.zeros([len(meta["parameters"]), 2], dtype=np.int32) ss = 0 # running sum of prod(size) # missing_length = True will be set if HAPI String or ISOTime # parameter has no length attribute in metadata (length attribute is # required for both in binary but only for primary time column in CSV). # When missing_length=True the CSV read gets more complicated. missing_length = False # Extract sizes and types of parameters. for i in range(0, len(meta["parameters"])): ptype = str(meta["parameters"][i]["type"]) pnames.append(str(meta["parameters"][i]["name"])) if 'size' in meta["parameters"][i]: psizes.append(meta["parameters"][i]['size']) else: psizes.append(1) # For size = [N] case, readers want # dtype = ('name', type, N) # not # dtype = ('name', type, [N]) if type(psizes[i]) is list and len(psizes[i]) == 1: psizes[i] = psizes[i][0] if type(psizes[i]) is list and len(psizes[i]) > 1: #psizes[i] = list(reversed(psizes[i])) psizes[i] = list(psizes[i]) # First column of ith parameter. cols[i][0] = ss # Last column of ith parameter. cols[i][1] = ss + np.prod(psizes[i]) - 1 # Running sum of columns. ss = cols[i][1] + 1 # HAPI numerical formats are 64-bit LE floating point and 32-bit LE # signed integers. if ptype == 'double': dtype = (pnames[i], '<d', psizes[i]) if ptype == 'integer': dtype = (pnames[i], np.dtype('<i4'), psizes[i]) if opts['format'] == 'binary': # TODO: If 'length' not available, warn and fall back to CSV. # Technically, server response is invalid in this case b/c length attribute # required for all parameters if format=binary. if ptype == 'string' or ptype == 'isotime': dtype = (pnames[i], 'S' + str(meta["parameters"][i]["length"]), psizes[i]) else: # When format=csv, length attribute may not be given (but must be given for # first parameter according to the HAPI spec). if ptype == 'string' or ptype == 'isotime': if 'length' in meta["parameters"][i]: # length is specified for parameter in metadata. Use it. if ptype == 'string' or 'isotime': dtype = (pnames[i], 'S' + str(meta["parameters"][i]["length"]), psizes[i]) else: # A string or isotime parameter did not have a length. # Will need to use slower CSV read method. missing_length = True if ptype == 'string' or ptype == 'isotime': dtype = (pnames[i], object, psizes[i]) # For testing reader. Force use of slow read method. if opts['format'] == 'csv': if opts['method'] == 'numpynolength' or opts[ 'method'] == 'pandasnolength': missing_length = True if ptype == 'string' or ptype == 'isotime': dtype = (pnames[i], object, psizes[i]) # https://numpy.org/doc/stable/release/1.17.0-notes.html#shape-1-fields-in-dtypes-won-t-be-collapsed-to-scalars-in-a-future-version if dtype[2] == 1: dtype = dtype[0:2] dt.append(dtype) ################################################################## # length attribute required for all parameters when serving binary but # is only required for time parameter when serving CSV. This catches # case where server provides binary but is missing a length attribute # in one or more string parameters that were requested. Note that # this is will never be true. Need to update code above. # if opts['format'] == 'binary' and missing_length: # warnings.warn('Requesting CSV instead of binary because of problem with server metadata.') # opts['format'] == 'csv' # Read the data. toc0 is time to download (or build buffer); # toc is time to parse (includes download time if buffered IO is used.) if opts['format'] == 'binary': # HAPI Binary if opts["cache"]: log( 'Writing %s to %s' % (urlbin, fnamebin.replace(urld + '/', '')), opts) tic0 = time.time() urlretrieve(urlbin, fnamebin) toc0 = time.time() - tic0 log('Reading %s' % fnamebin.replace(urld + '/', ''), opts) tic = time.time() data = np.fromfile(fnamebin, dtype=dt) toc = time.time() - tic else: from io import BytesIO log('Creating buffer: %s' % urlbin, opts) tic0 = time.time() buff = BytesIO(urlopen(urlbin).read()) toc0 = time.time() - tic0 log('Parsing buffer.', opts) tic = time.time() data = np.frombuffer(buff.read(), dtype=dt) toc = time.time() - tic else: # HAPI CSV if opts["cache"]: log('Saving %s' % urlcsv.replace(urld + '/', ''), opts) tic0 = time.time() urlretrieve(urlcsv, fnamecsv) toc0 = time.time() - tic0 log('Parsing %s' % fnamecsv.replace(urld + '/', ''), opts) else: from io import StringIO log('Creating buffer: %s' % urlcsv.replace(urld + '/', ''), opts) tic0 = time.time() fnamecsv = StringIO(urlopen(urlcsv).read().decode()) toc0 = time.time() - tic0 log('Parsing buffer.', opts) if not missing_length: # All string and isotime parameters have a length in metadata. tic = time.time() if opts['method'] == 'numpy': data = np.genfromtxt(fnamecsv, dtype=dt, delimiter=',') toc = time.time() - tic if opts['method'] == 'pandas': # Read file into Pandas DataFrame df = pandas.read_csv(fnamecsv, sep=',', header=None) # Allocate output N-D array (It is not possible to pass dtype=dt # as computed to pandas.read_csv; pandas dtype is different # from numpy's dtype.) data = np.ndarray(shape=(len(df)), dtype=dt) print(df) # Insert data from dataframe 'df' columns into N-D array 'data' for i in range(0, len(pnames)): shape = np.append(len(data), psizes[i]) # In numpy 1.8.2 and Python 2.7, this throws an error # for no apparent reason. Works as expected in numpy 1.10.4 print(cols) data[pnames[i]] = np.squeeze( np.reshape( df.values[:, np.arange(cols[i][0], cols[i][1] + 1)], shape)) toc = time.time() - tic else: # At least one requested string or isotime parameter does not # have a length in metadata. More work to do to read. tic = time.time() if opts['method'] == 'numpy' or opts[ 'method'] == 'numpynolength': # If requested method was numpy, use numpynolength method. # With dtype='None', the data type is determined automatically table = np.genfromtxt(fnamecsv, dtype=None, delimiter=',', encoding='utf-8') # table is a 1-D array. Each element is a row in the file. # - If the data types are not the same for each column, # the elements are tuples with length equal to the number # of columns. # - If the data types are the same for each column, which # will happen if only Time is requested or Time and # a string or isotime parameter is requested, then table # has rows that are 1-D numpy arrays. # Contents of 'table' will be placed into N-D array 'data'. data = np.ndarray(shape=(len(table)), dtype=dt) # Insert data from 'table' into N-D array 'data' if table.dtype.names is None: if len(pnames) == 1: # Only time parameter requested. data[pnames[0]] = table[:] else: # All columns in 'table' have the same data type # so table is a 2-D numpy matrix for i in range(0, len(pnames)): shape = np.append(len(data), psizes[i]) data[pnames[i]] = np.squeeze( np.reshape( table[:, np. arange(cols[i][0], cols[i][1] + 1)], shape)) else: # Table is not a 2-D numpy matrix. # Extract each column (don't know how to do this with slicing # notation, e.g., data['varname'] = table[:][1:3]). Instead, # loop over each parameter (pn) and aggregate columns. # Then insert aggregated columns into N-D array 'data'. for pn in range(0, len(cols)): shape = np.append(len(data), psizes[pn]) for c in range(cols[pn][0], cols[pn][1] + 1): if c == cols[pn][0]: # New parameter tmp = table[table.dtype.names[c]] else: # Aggregate tmp = np.vstack( (tmp, table[table.dtype.names[c]])) tmp = np.squeeze( np.reshape(np.transpose(tmp), shape)) data[pnames[pn]] = tmp if opts['method'] == 'pandas' or opts[ 'method'] == 'pandasnolength': # If requested method was pandas, use pandasnolength method. # Read file into Pandas DataFrame df = pandas.read_csv(fnamecsv, sep=',', header=None) # Allocate output N-D array (It is not possible to pass dtype=dt # as computed to pandas.read_csv, so need to create new ND array.) data = np.ndarray(shape=(len(df)), dtype=dt) # Insert data from dataframe into N-D array for i in range(0, len(pnames)): shape = np.append(len(data), psizes[i]) # In numpy 1.8.2 and Python 2.7, this throws an error for no apparent reason. # Works as expected in numpy 1.10.4 data[pnames[i]] = np.squeeze( np.reshape( df.values[:, np.arange(cols[i][0], cols[i][1] + 1)], shape)) # Any of the string parameters that do not have an associated # length in the metadata will have dtype='O' (object). # These parameters must be converted to have a dtype='SN', where # N is the maximum string length. N is determined automatically # when using astype('<S') (astype uses largest N needed). dt2 = [] # Will have dtypes with strings lengths calculated. for i in range(0, len(pnames)): if data[pnames[i]].dtype == 'O': dtype = (pnames[i], str(data[pnames[i]].astype('<S').dtype), psizes[i]) else: dtype = dt[i] # https://numpy.org/doc/stable/release/1.17.0-notes.html#shape-1-fields-in-dtypes-won-t-be-collapsed-to-scalars-in-a-future-version if len(dtype) > 2 and dtype[2] == 1: dtype = dtype[0:2] dt2.append(dtype) # Create new N-D array that won't have any parameters with # type = 'O'. data2 = np.ndarray(data.shape, dt2) for i in range(0, len(pnames)): if data[pnames[i]].dtype == 'O': data2[pnames[i]] = data[pnames[i]].astype(dt2[i][1]) else: data2[pnames[i]] = data[pnames[i]] # Save memory by not copying (does this help?) #data2[pnames[i]] = np.array(data[pnames[i]],copy=False) toc = time.time() - tic # Extra metadata associated with request will be saved in # a pkl file with same base name as npy data file. meta.update({"x_server": SERVER}) meta.update({"x_dataset": DATASET}) meta.update({"x_parameters": PARAMETERS}) meta.update({"x_time.min": START}) meta.update({"x_time.max": STOP}) meta.update({"x_requestDate": datetime.now().isoformat()[0:19]}) meta.update({"x_cacheDir": urld}) meta.update({"x_downloadTime": toc0}) meta.update({"x_readTime": toc}) meta.update({"x_metaFileParsed": fnamepkl}) meta.update({"x_dataFileParsed": fnamenpy}) meta.update({"x_metaFile": fnamejson}) if opts['format'] == 'binary': meta.update({"x_dataFile": fnamebin}) else: meta.update({"x_dataFile": fnamecsv}) # Note that this should only technically be # written if cache=True. Will do this when output is # h = hapi(...) # h.data # h.meta # h.info # Create cache directory if not os.path.exists(opts["cachedir"]): os.makedirs(opts["cachedir"]) if not os.path.exists(urld): os.makedirs(urld) log('Writing %s' % fnamepklx, opts) f = open(fnamepklx, 'wb') pickle.dump(meta, f, protocol=2) f.close() if opts["cache"]: log('Writing %s' % fnamenpy, opts) if missing_length: np.save(fnamenpy, data2) else: np.save(fnamenpy, data) if missing_length: return data2, meta else: return data, meta
def gallery(*args, **kwargs): """Create a web-browsable gallery of plots (aka "PNG Walk"). Experimental code. Requires hapiplotserver. Use pip install 'git+https://github.com/hapi-server/plotserver-python' For additional documentation and demonstration, see hapi_demo.ipynb at <https://github.com/hapi-server/client-python-notebooks/> Usage ---------- gallery(server, dataset) gallery(server, dataset, parameter) Examples ---------- >>> from hapiclient import gallery >>> gallery('http://hapi-server.org/servers/TestData/hapi', 'dataset1') # Webpage tab opens >>> from hapiclient import gallery >>> gallery('http://hapi-server.org/servers/TestData/hapi','dataset1', 'vector') # Webpage tab opens Parameters ---------- server : str A URL for a HAPI-compliant server. (A HAPI URL always ends with "/hapi".) dataset : str A dataset from a HAPI server. The valid datasets can be determined using `hapi(server)`. parameter : str A parameter in dataset. The valid parameters can be determined using `hapi(server, dataset)`. Returns ---------- None (a new tab is opened in the user's default browser) """ import time import webbrowser from multiprocessing import Process from hapiclient.hapi import cachedir from hapiclient.util import error, warning, setopts, prompt from hapiplotserver import hapiplotserver if len(args) != 2 and len(args) != 3: error('Number of arguments must be 2 or 3. See help(gallery).') server = args[0] dataset = args[1] if len(args) == 3: parameters = args[2].split(",") else: parameters = [''] if len(parameters) > 1: # Eventually, mulitple parameters will result is a stack plot. warning('Multiple parameters given; only first will be shown.') parameters = parameters[0] if not all(type(arg) is str for arg in args): error('All inputs must be a strings. See help(gallery).') # Default options opts = { 'cache_dir': cachedir(), 'usecache': True, 'port': 5002, 'format': 'png', 'figsize': (7, 3), 'dpi': 144, 'transparent': True, 'loglevel': 'default' } # Override defaults opts = setopts(opts, kwargs) if not parameters == '': paramopt = "¶meters=" + parameters else: paramopt = '' url = 'http://127.0.0.1:' + str(opts['port']) url = url + '/?server=' + server url = url + '&id=' + dataset url = url + paramopt url = url + '&format=gallery' try: process = Process(target=hapiplotserver, kwargs=opts) process.start() except Exception as e: print(e) print("Terminating server.") process.terminate() print(" * Opening ViViz in browser in 1 second.") time.sleep(1) webbrowser.open(url, new=2) prompt( "\n\033[0;34mPress a key at any time to terminate ViViz gallery server.\033[0m\n\n" ) process.terminate() print("ViViz gallery server has terminated.")