def hapitime2datetime(Time, **kwargs): """Convert HAPI timestamps to Python datetimes. A HAPI-compliant server represents time as an ISO 8601 string (with several constraints - see the `HAPI specification <https://github.com/hapi-server/data-specification/blob/master/hapi-dev/HAPI-data-access-spec-dev.md#representation-of-time>`) hapi() reads these into a NumPy array of Python byte literals. This function converts the byte literals to Python datetime objects. Typical usage: data = hapi(...) # Get data DateTimes = hapitime2datetime(data['Time']) # Convert Parameter ---------- Time: - A numpy array of HAPI timestamp byte literals - A numpy array of HAPI timestamp strings - A list of HAPI timestamp byte literals - A list of HAPI timestamp strings - A HAPI timestamp byte literal - A HAPI timestamp strings Returns ---------- A NumPy array Python datetime objects with length = len(Time) Examples ---------- All of the following return array([datetime.datetime(1970, 1, 1, 0, 0)], dtype=object) from hapiclient.hapi import hapitime2datetime import numpy as np hapitime2datetime(np.array([b'1970-01-01T00:00:00.000Z'])) hapitime2datetime(np.array(['1970-01-01T00:00:00.000Z'])) hapitime2datetime([b'1970-01-01T00:00:00.000Z']) hapitime2datetime(['1970-01-01T00:00:00.000Z']) hapitime2datetime([b'1970-01-01T00:00:00.000Z']) hapitime2datetime('1970-01-01T00:00:00.000Z') """ from datetime import datetime try: # Python 2 import pytz tzinfo = pytz.UTC except: tzinfo = datetime.timezone.utc opts = {'logging': False} opts = setopts(opts, kwargs) if type(Time) == list: Time = np.asarray(Time) if type(Time) == str or type(Time) == bytes: Time = np.asarray([Time]) if type(Time) != np.ndarray: error('Problem with time data.' + '\n') return if Time.size == 0: error('Time array is empty.' + '\n') return reshape = False if Time.shape[0] != Time.size: reshape = True shape = Time.shape Time = Time.flatten() if type(Time[0]) == np.bytes_: try: Time = Time.astype('U') except: error('Problem with time data. First value: ' + str(Time[0]) + '\n') return tic = time.time() try: # Will fail if no pandas, if YYYY-DOY format and other valid ISO 8601 # dates such as 2001-01-01T00:00:03.Z # When infer_datetime_format is used, TimeStamp object returned. # When format=... is used, datetime object is used. Time = pandas.to_datetime(Time, infer_datetime_format=True).to_pydatetime() if reshape: Time = np.reshape(Time, shape) toc = time.time() - tic log("Pandas processing time = %.4fs, Input = %s\n" % (toc, Time[0]), opts) return Time except: pass # Convert from Python byte literals to unicode strings # https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.astype.html # https://www.b-list.org/weblog/2017/sep/05/how-python-does-unicode/ # Note the new Time variable requires 4x more memory. Time = Time.astype('U') # Could save memory at cost of speed by decoding at each iteration below, e.g. # Time[i] -> Time[i].decode('utf-8') d = 0 # Catch case where no trailing Z # Technically HAPI ISO 8601 must have trailing Z: # https://github.com/hapi-server/data-specification/blob/master/hapi-dev/HAPI-data-access-spec-dev.md#representation-of-time if not re.match(r".*Z$", Time[0]): d = 1 pythonDateTime = np.empty(len(Time), dtype=object) # Parse date part # If h=True then hour given. # If hm=True, then hour and minute given. # If hms=True, them hour, minute, and second given. (h, hm, hms) = (False, False, False) if len(Time[0]) == 4 or (len(Time[0]) == 5 and Time[0][-1] == "Z"): fmt = '%Y' to = 5 elif re.match(r"[0-9]{4}-[0-9]{3}", Time[0]): # YYYY-DOY format fmt = "%Y-%j" to = 9 if len(Time[0]) >= 12 - d: h = True if len(Time[0]) >= 15 - d: hm = True if len(Time[0]) >= 18 - d: hms = True elif re.match(r"[0-9]{4}-[0-9]{2}", Time[0]): # YYYY-MM-DD format fmt = "%Y-%m" to = 8 if len(Time[0]) > 8: fmt = fmt + "-%d" to = 11 if len(Time[0]) >= 14 - d: h = True if len(Time[0]) >= 17 - d: hm = True if len(Time[0]) >= 20 - d: hms = True else: # TODO: Also check for invalid time string lengths. # Should use JSON schema regular expressions for allowed versions of ISO 8601. error('First time value %s is not a valid HAPI Time' % Time[0]) fmto = fmt if h: fmt = fmt + "T%H" if hm: fmt = fmt + ":%M" if hms: fmt = fmt + ":%S" if re.match(r".*\.[0-9].*$", Time[0]): fmt = fmt + ".%f" if re.match(r".*\.$", Time[0]) or re.match(r".*\.Z$", Time[0]): fmt = fmt + "." if re.match(r".*Z$", Time[0]): fmt = fmt + "Z" # TODO: Why not use pandas.to_datetime here with fmt? try: for i in range(0, len(Time)): pythonDateTime[i] = datetime.strptime(Time[i], fmt).replace(tzinfo=tzinfo) except: error('Could not parse time value ' + Time[i] + ' using ' + fmt) toc = time.time() - tic log( "Manual processing time = %.4fs, Input = %s, fmto = %s, fmt = %s\n" % (toc, Time[0], fmto, fmt), opts) if reshape: pythonDateTime = np.reshape(pythonDateTime, shape) return pythonDateTime
def autoplot(server, dataset, parameters, start, stop, **kwargs): """Plot data from a HAPI server using Autoplot. If not found, autoplot.jar is downloaded an launched. If found, autoplot.jar is updated if server version is newer than cached version. Example ------- >>> from hapiclient import autoplot >>> server = 'http://hapi-server.org/servers/TestData2.0/hapi' >>> autoplot(server, 'dataset1', 'scalar,vector', '1970-01-01', '1970-01-02') Autoplot application launches or its canvas is updated. The options are the same as that for `hapiplot` with the addition of the kwargs stack : bool [False] Create a stack plot of parameters. port : int [8079] The port number to use to connect to Autoplot. version : string ['devel'] The version of Autoplot to use. Can be a version string, e.g., 'v2018a_11', 'devel', 'latest', or 'nightly'. See <http://autoplot.org/developer#Development_Versions> for a description of the difference between versions. """ import os import re import platform import subprocess from hapiclient.util import setopts, log, urlopen, urlretrieve, urlquote from hapiclient.hapi import cachedir opts = { 'logging': False, 'cache': True, 'cachedir': cachedir(), 'usecache': False, 'newwindow': False, 'version': 'devel', 'port': 8079 } # Override defaults opts = setopts(opts, kwargs) autoplotserver = "http://localhost:" + str(opts['port']) + "/" url = server + "?id=" + dataset + "¶meters=" + parameters url = url + "&timerange=" + start + "/" + stop serverrunning = False try: # See if server needs to be started. if opts['logging']: log('Trying test. Requesting ' + autoplotserver, opts) f = urlopen(autoplotserver) res = f.read().decode('utf-8') if res.startswith('OK'): log('Server running.', opts) serverrunning = True else: log('Server responding but with wrong response to test.', opts) f.close() except: log('Server not running. Will start server.', opts) print(url) if serverrunning: # Send request to update GUI. try: # This won't detect if the version requested matches # the version running. rurl = autoplotserver + "?uri=" + urlquote("vap+hapi:" + url) if opts['logging']: print("autoplot(): Requesting " + rurl) log('Autoplot GUI should be updating.', opts) f = urlopen(rurl) res = f.read().decode('utf-8') if res.startswith('OK'): log('Request successful. Autoplot GUI updated.', opts) f.close() return else: f.close() log('Request unsuccessful.', opts) serverrunning = False except Exception as e: print(e) # Request was sent, so return. if serverrunning == True: return if opts['version'] == 'nightly': jarurl = 'https://ci-pw.physics.uiowa.edu/job/autoplot-release/lastSuccessfulBuild/artifact/autoplot/Autoplot/dist/autoplot.jar' elif opts['version'] == 'devel': jarurl = 'http://autoplot.org/jnlp/devel/autoplot.jar' elif opts['version'].startswith('v'): jarurl = 'http://autoplot.org/jnlp/' + opts['version'] + '/autoplot.jar' else: opts['version'] = 'latest' jarurl = 'http://autoplot.org/jnlp/latest/autoplot.jar' try: result = subprocess.check_output('java -version', shell=True, stderr=subprocess.STDOUT) version = re.sub(r'.*"(.*)".*', r'\1', result.decode().split('\n')[0]) log("Java version: " + version, opts) except: # TODO: Automatically download and extract from https://jdk.java.net/14/? log( "Java is required. See https://www.java.com/en/download/ or https://jdk.java.net/14/", opts) return jydir = os.path.dirname(os.path.realpath(__file__)) jarpath = os.path.join(opts['cachedir'], 'jar/autoplot-' + opts['version'] + '.jar') jaricon = os.path.join(jydir, 'autoplot.png') # Download jar file if needed. log('Checking if autoplot.jar needs to be downloaded or updated.', opts) urlretrieve(jarurl, jarpath, check_last_modified=True, **opts) #download(jarpath, jarurl, **opts) com = "java" if 'darwin' in platform.platform().lower(): com = com + " -Xdock:icon=" + jaricon com = com + ' -Xdock:name="Autoplot"' com = com + " -DPORT=" + str(opts['port']) com = com + " -DHAPI_DATA=" + opts['cachedir'] com = com + " -DhapiServerCache=true" com = com + " -jar " + jarpath com = com + " --noAskParams" com = com + " '" + os.path.join(jydir, 'server.jy?uri=') com = com + urlquote("vap+hapi:" + url) + "'" com = com + " &" if opts['logging']: log("Executing " + com, opts) os.system(com)
def hapi(*args, **kwargs): """Request data from a HAPI server. For additional documentation and demonstration, see https://github.com/hapi-server/client-python-notebooks/blob/master/hapi_demo.ipynb Version: 0.1.5b3 Parameters ---------- server : str A string with the URL to a HAPI compliant server. (A HAPI URL always ends with "/hapi"). dataset : str A string specifying a dataset from a `server` parameters: str A Comma-separated list of parameters in `dataset` start: str The start time of the requested data stop: str The end time of the requested data; end times are exclusive - the last data record returned by a HAPI server should have a timestamp before `start`. options : dict `logging` (False) - Log to console `cache` (True) - Save responses and processed responses in cachedir `cachedir` (./hapi-data) `usecache` (True) - Use files in `cachedir` if found `serverlist` (https://github.com/hapi-server/servers/raw/master/all.txt) Returns ------- result : various `result` depend on the input parameters. servers = hapi() returns a list of available HAPI server URLs from https://github.com/hapi-server/data-specification/blob/master/all.txt dataset = hapi(server) returns a dict of datasets available from a URL given by the string `server`. The dictionary structure follows the HAPI JSON structure. parameters = hapi(server, dataset) returns a dictionary of parameters in the string `dataset`. The dictionary structure follows the HAPI JSON structure. metadata = hapi(server, dataset, parameters) returns metadata associated each parameter in the comma-separated string `parameters`. The dictionary structure follows the HAPI JSON structure. data = hapi(server, dataset, parameters, start, stop) returns a dictionary with elements corresponding to `parameters`, e.g., if `parameters` = 'scalar,vector' and the number of records in the time range `start` <= t < `stop` returned is N, then data['scalar'] is a NumPy array of shape (N) data['vector'] is a NumPy array of shape (N,3) data['Time'] is a NumPy array of byte literals with shape (N). Byte literal times can be converted to Python datetimes using dtarray = hapitime2datetime(data['Time']) data, meta = hapi(server, dataset, parameters, start, stop) returns the metadata for parameters in `meta`. References ---------- * `HAPI Server Definition <https://github.com/hapi-server/data-specification>`_ Examples ---------- See https://github.com/hapi-server/client-python-notebooks """ nin = len(args) if nin > 0: SERVER = args[0] if nin > 1: DATASET = args[1] if nin > 2: PARAMETERS = args[2] if nin > 3: START = args[3] if nin > 4: STOP = args[4] # Override defaults opts = setopts(hapiopts(), kwargs) from hapiclient import __version__ log('Running hapi.py version %s' % __version__, opts) if nin == 0: # hapi() log('Reading %s' % opts['server_list'], opts) # decode('utf8') in following needed to make Python 2 and 3 types match. data = urlopen(opts['server_list']).read().decode('utf8').split('\n') data = [x for x in data if x] # Remove empty items (if blank lines) # Display server URLs to console. log('List of HAPI servers in %s:\n' % opts['server_list'], opts) for url in data: log(" %s" % url, opts) return data if nin == 1: # hapi(SERVER) # TODO: Cache url = SERVER + '/catalog' log('Reading %s' % url, opts) res = urlopen(url) meta = jsonparse(res, url) return meta if nin == 2: # hapi(SERVER, DATASET) # TODO: Cache url = SERVER + '/info?id=' + DATASET log('Reading %s' % url, opts) res = urlopen(url) meta = jsonparse(res, url) return meta if nin == 4: error('A stop time is required if a start time is given.') if nin == 3 or nin == 5: # hapi(SERVER, DATASET, PARAMETERS) or # hapi(SERVER, DATASET, PARAMETERS, START, STOP) if re.search(r', ', PARAMETERS): warning( "Removing spaces after commas in given parameter list of '" + PARAMETERS + "'") PARAMETERS = re.sub(r',\s+', ',', PARAMETERS) # urld = url subdirectory of cachedir to store files from SERVER urld = cachedir(opts["cachedir"], SERVER) if opts["cachedir"]: log('file directory = %s' % urld, opts) urljson = SERVER + '/info?id=' + DATASET # Output from urljson will be saved in a .json file. Parsed json # will be stored in a .pkl file. Metadata for all parameters is # requested and response is subsetted so only metadata for PARAMETERS # is returned. fname_root = request2path(SERVER, DATASET, '', '', '', opts['cachedir']) fnamejson = fname_root + '.json' fnamepkl = fname_root + '.pkl' if nin == 5: # Data requested # URL to get CSV (will be used if binary response is not available) urlcsv = SERVER + '/data?id=' + DATASET + '¶meters=' + \ PARAMETERS + '&time.min=' + START + '&time.max=' + STOP # URL for binary request urlbin = urlcsv + '&format=binary' # Raw CSV and HAPI Binary (no header) will be stored in .csv and # .bin files. Parsed response of either CSV or HAPI Binary will # be stored in a .npy file. # fnamepklx will contain additional metadata about the request # including d/l time, parsing time, and the location of files. fname_root = request2path(SERVER, DATASET, PARAMETERS, START, STOP, opts['cachedir']) fnamecsv = fname_root + '.csv' fnamebin = fname_root + '.bin' fnamenpy = fname_root + '.npy' fnamepklx = fname_root + ".pkl" metaFromCache = False if opts["usecache"]: if nin == 3 and os.path.isfile(fnamepkl): # Read cached metadata from .pkl file. # This returns subsetted metadata with no additional "x_" # information (which is stored in fnamepklx). log('Reading %s' % fnamepkl.replace(urld + '/', ''), opts) f = open(fnamepkl, 'rb') meta = pickle.load(f) f.close() metaFromCache = True # Remove parameters not requested. meta = subset(meta, PARAMETERS) return meta if os.path.isfile(fnamepklx): # Read subsetted meta file with x_ information log('Reading %s' % fnamepklx.replace(urld + '/', ''), opts) f = open(fnamepklx, 'rb') meta = pickle.load(f) metaFromCache = True f.close() if not metaFromCache: # No cached metadata loaded so request it from server. log('Reading %s' % urljson.replace(urld + '/', ''), opts) res = urlopen(urljson) meta = jsonparse(res, urljson) # Add information to metdata so we can figure out request needed # to generated it. Will also be used for labeling plots by hapiplot(). meta.update({"x_server": SERVER}) meta.update({"x_dataset": DATASET}) if opts["cache"]: if not os.path.exists(urld): os.makedirs(urld) if opts["cache"] and not metaFromCache: # Cache metadata for all parameters if it was not already loaded # from cache. Note that fnamepklx is written after data downloaded # and parsed. log('Writing %s ' % fnamejson.replace(urld + '/', ''), opts) f = open(fnamejson, 'w') json.dump(meta, f, indent=4) f.close() log('Writing %s ' % fnamepkl.replace(urld + '/', ''), opts) f = open(fnamepkl, 'wb') # protocol=2 used for Python 2.7 compatability. pickle.dump(meta, f, protocol=2) f.close() # Remove unrequested parameters if they have not have already been # removed (b/c loaded from cache). if not metaFromCache: meta = subset(meta, PARAMETERS) if nin == 3: return meta if opts["usecache"] and os.path.isfile(fnamenpy): # Read cached data file. log('Reading %s ' % fnamenpy.replace(urld + '/', ''), opts) f = open(fnamenpy, 'rb') data = np.load(f) f.close() # There is a possibility that the fnamenpy file existed but # fnamepklx was not found (b/c removed). In this case, the meta # returned will not have all of the "x_" information inserted below. # Code that uses this information needs to account for this. return data, meta cformats = ['csv', 'binary'] # client formats if not opts['format'] in cformats: # Check if requested format is implemented by this client. error('This client does not handle transport ' 'format "%s". Available options: %s' % (opts['format'], ', '.join(cformats))) # See if server supports binary if opts['format'] != 'csv': log('Reading %s' % (SERVER + '/capabilities'), opts) res = urlopen(SERVER + '/capabilities') caps = jsonparse(res, SERVER + '/capabilities') sformats = caps["outputFormats"] # Server formats if 'format' in kwargs and not kwargs['format'] in sformats: warning( "hapi", 'Requested transport format "%s" not avaiable ' 'from %s. Will use "csv". Available options: %s' % (opts['format'], SERVER, ', '.join(sformats))) opts['format'] = 'csv' if not 'binary' in sformats: opts['format'] = 'csv' ################################################################## # Compute data type variable dt used to read HAPI response into # a data structure. pnames, psizes, dt = [], [], [] # Each element of cols is an array with start/end column number of # parameter. cols = np.zeros([len(meta["parameters"]), 2], dtype=np.int32) ss = 0 # running sum of prod(size) # missing_length = True will be set if HAPI String or ISOTime # parameter has no length attribute in metadata (length attribute is # required for both in binary but only for primary time column in CSV). # When missing_length=True the CSV read gets more complicated. missing_length = False # Extract sizes and types of parameters. for i in range(0, len(meta["parameters"])): ptype = str(meta["parameters"][i]["type"]) pnames.append(str(meta["parameters"][i]["name"])) if 'size' in meta["parameters"][i]: psizes.append(meta["parameters"][i]['size']) else: psizes.append(1) # For size = [N] case, readers want # dtype = ('name', type, N) # not # dtype = ('name', type, [N]) if type(psizes[i]) is list and len(psizes[i]) == 1: psizes[i] = psizes[i][0] if type(psizes[i]) is list and len(psizes[i]) > 1: #psizes[i] = list(reversed(psizes[i])) psizes[i] = list(psizes[i]) # First column of ith parameter. cols[i][0] = ss # Last column of ith parameter. cols[i][1] = ss + np.prod(psizes[i]) - 1 # Running sum of columns. ss = cols[i][1] + 1 # HAPI numerical formats are 64-bit LE floating point and 32-bit LE # signed integers. if ptype == 'double': dtype = (pnames[i], '<d', psizes[i]) if ptype == 'integer': dtype = (pnames[i], np.dtype('<i4'), psizes[i]) if opts['format'] == 'binary': # TODO: If 'length' not available, warn and fall back to CSV. # Technically, server response is invalid in this case b/c length attribute # required for all parameters if format=binary. if ptype == 'string' or ptype == 'isotime': dtype = (pnames[i], 'S' + str(meta["parameters"][i]["length"]), psizes[i]) else: # When format=csv, length attribute may not be given (but must be given for # first parameter according to the HAPI spec). if ptype == 'string' or ptype == 'isotime': if 'length' in meta["parameters"][i]: # length is specified for parameter in metadata. Use it. if ptype == 'string' or 'isotime': dtype = (pnames[i], 'S' + str(meta["parameters"][i]["length"]), psizes[i]) else: # A string or isotime parameter did not have a length. # Will need to use slower CSV read method. missing_length = True if ptype == 'string' or ptype == 'isotime': dtype = (pnames[i], object, psizes[i]) # For testing reader. Force use of slow read method. if opts['format'] == 'csv': if opts['method'] == 'numpynolength' or opts[ 'method'] == 'pandasnolength': missing_length = True if ptype == 'string' or ptype == 'isotime': dtype = (pnames[i], object, psizes[i]) # https://numpy.org/doc/stable/release/1.17.0-notes.html#shape-1-fields-in-dtypes-won-t-be-collapsed-to-scalars-in-a-future-version if dtype[2] == 1: dtype = dtype[0:2] dt.append(dtype) ################################################################## # length attribute required for all parameters when serving binary but # is only required for time parameter when serving CSV. This catches # case where server provides binary but is missing a length attribute # in one or more string parameters that were requested. Note that # this is will never be true. Need to update code above. # if opts['format'] == 'binary' and missing_length: # warnings.warn('Requesting CSV instead of binary because of problem with server metadata.') # opts['format'] == 'csv' # Read the data. toc0 is time to download (or build buffer); # toc is time to parse (includes download time if buffered IO is used.) if opts['format'] == 'binary': # HAPI Binary if opts["cache"]: log( 'Writing %s to %s' % (urlbin, fnamebin.replace(urld + '/', '')), opts) tic0 = time.time() urlretrieve(urlbin, fnamebin) toc0 = time.time() - tic0 log('Reading %s' % fnamebin.replace(urld + '/', ''), opts) tic = time.time() data = np.fromfile(fnamebin, dtype=dt) toc = time.time() - tic else: from io import BytesIO log('Creating buffer: %s' % urlbin, opts) tic0 = time.time() buff = BytesIO(urlopen(urlbin).read()) toc0 = time.time() - tic0 log('Parsing buffer.', opts) tic = time.time() data = np.frombuffer(buff.read(), dtype=dt) toc = time.time() - tic else: # HAPI CSV if opts["cache"]: log('Saving %s' % urlcsv.replace(urld + '/', ''), opts) tic0 = time.time() urlretrieve(urlcsv, fnamecsv) toc0 = time.time() - tic0 log('Parsing %s' % fnamecsv.replace(urld + '/', ''), opts) else: from io import StringIO log('Creating buffer: %s' % urlcsv.replace(urld + '/', ''), opts) tic0 = time.time() fnamecsv = StringIO(urlopen(urlcsv).read().decode()) toc0 = time.time() - tic0 log('Parsing buffer.', opts) if not missing_length: # All string and isotime parameters have a length in metadata. tic = time.time() if opts['method'] == 'numpy': data = np.genfromtxt(fnamecsv, dtype=dt, delimiter=',') toc = time.time() - tic if opts['method'] == 'pandas': # Read file into Pandas DataFrame df = pandas.read_csv(fnamecsv, sep=',', header=None) # Allocate output N-D array (It is not possible to pass dtype=dt # as computed to pandas.read_csv; pandas dtype is different # from numpy's dtype.) data = np.ndarray(shape=(len(df)), dtype=dt) print(df) # Insert data from dataframe 'df' columns into N-D array 'data' for i in range(0, len(pnames)): shape = np.append(len(data), psizes[i]) # In numpy 1.8.2 and Python 2.7, this throws an error # for no apparent reason. Works as expected in numpy 1.10.4 print(cols) data[pnames[i]] = np.squeeze( np.reshape( df.values[:, np.arange(cols[i][0], cols[i][1] + 1)], shape)) toc = time.time() - tic else: # At least one requested string or isotime parameter does not # have a length in metadata. More work to do to read. tic = time.time() if opts['method'] == 'numpy' or opts[ 'method'] == 'numpynolength': # If requested method was numpy, use numpynolength method. # With dtype='None', the data type is determined automatically table = np.genfromtxt(fnamecsv, dtype=None, delimiter=',', encoding='utf-8') # table is a 1-D array. Each element is a row in the file. # - If the data types are not the same for each column, # the elements are tuples with length equal to the number # of columns. # - If the data types are the same for each column, which # will happen if only Time is requested or Time and # a string or isotime parameter is requested, then table # has rows that are 1-D numpy arrays. # Contents of 'table' will be placed into N-D array 'data'. data = np.ndarray(shape=(len(table)), dtype=dt) # Insert data from 'table' into N-D array 'data' if table.dtype.names is None: if len(pnames) == 1: # Only time parameter requested. data[pnames[0]] = table[:] else: # All columns in 'table' have the same data type # so table is a 2-D numpy matrix for i in range(0, len(pnames)): shape = np.append(len(data), psizes[i]) data[pnames[i]] = np.squeeze( np.reshape( table[:, np. arange(cols[i][0], cols[i][1] + 1)], shape)) else: # Table is not a 2-D numpy matrix. # Extract each column (don't know how to do this with slicing # notation, e.g., data['varname'] = table[:][1:3]). Instead, # loop over each parameter (pn) and aggregate columns. # Then insert aggregated columns into N-D array 'data'. for pn in range(0, len(cols)): shape = np.append(len(data), psizes[pn]) for c in range(cols[pn][0], cols[pn][1] + 1): if c == cols[pn][0]: # New parameter tmp = table[table.dtype.names[c]] else: # Aggregate tmp = np.vstack( (tmp, table[table.dtype.names[c]])) tmp = np.squeeze( np.reshape(np.transpose(tmp), shape)) data[pnames[pn]] = tmp if opts['method'] == 'pandas' or opts[ 'method'] == 'pandasnolength': # If requested method was pandas, use pandasnolength method. # Read file into Pandas DataFrame df = pandas.read_csv(fnamecsv, sep=',', header=None) # Allocate output N-D array (It is not possible to pass dtype=dt # as computed to pandas.read_csv, so need to create new ND array.) data = np.ndarray(shape=(len(df)), dtype=dt) # Insert data from dataframe into N-D array for i in range(0, len(pnames)): shape = np.append(len(data), psizes[i]) # In numpy 1.8.2 and Python 2.7, this throws an error for no apparent reason. # Works as expected in numpy 1.10.4 data[pnames[i]] = np.squeeze( np.reshape( df.values[:, np.arange(cols[i][0], cols[i][1] + 1)], shape)) # Any of the string parameters that do not have an associated # length in the metadata will have dtype='O' (object). # These parameters must be converted to have a dtype='SN', where # N is the maximum string length. N is determined automatically # when using astype('<S') (astype uses largest N needed). dt2 = [] # Will have dtypes with strings lengths calculated. for i in range(0, len(pnames)): if data[pnames[i]].dtype == 'O': dtype = (pnames[i], str(data[pnames[i]].astype('<S').dtype), psizes[i]) else: dtype = dt[i] # https://numpy.org/doc/stable/release/1.17.0-notes.html#shape-1-fields-in-dtypes-won-t-be-collapsed-to-scalars-in-a-future-version if len(dtype) > 2 and dtype[2] == 1: dtype = dtype[0:2] dt2.append(dtype) # Create new N-D array that won't have any parameters with # type = 'O'. data2 = np.ndarray(data.shape, dt2) for i in range(0, len(pnames)): if data[pnames[i]].dtype == 'O': data2[pnames[i]] = data[pnames[i]].astype(dt2[i][1]) else: data2[pnames[i]] = data[pnames[i]] # Save memory by not copying (does this help?) #data2[pnames[i]] = np.array(data[pnames[i]],copy=False) toc = time.time() - tic # Extra metadata associated with request will be saved in # a pkl file with same base name as npy data file. meta.update({"x_server": SERVER}) meta.update({"x_dataset": DATASET}) meta.update({"x_parameters": PARAMETERS}) meta.update({"x_time.min": START}) meta.update({"x_time.max": STOP}) meta.update({"x_requestDate": datetime.now().isoformat()[0:19]}) meta.update({"x_cacheDir": urld}) meta.update({"x_downloadTime": toc0}) meta.update({"x_readTime": toc}) meta.update({"x_metaFileParsed": fnamepkl}) meta.update({"x_dataFileParsed": fnamenpy}) meta.update({"x_metaFile": fnamejson}) if opts['format'] == 'binary': meta.update({"x_dataFile": fnamebin}) else: meta.update({"x_dataFile": fnamecsv}) # Note that this should only technically be # written if cache=True. Will do this when output is # h = hapi(...) # h.data # h.meta # h.info # Create cache directory if not os.path.exists(opts["cachedir"]): os.makedirs(opts["cachedir"]) if not os.path.exists(urld): os.makedirs(urld) log('Writing %s' % fnamepklx, opts) f = open(fnamepklx, 'wb') pickle.dump(meta, f, protocol=2) f.close() if opts["cache"]: log('Writing %s' % fnamenpy, opts) if missing_length: np.save(fnamenpy, data2) else: np.save(fnamenpy, data) if missing_length: return data2, meta else: return data, meta
def hapiplot(*args, **kwargs): """Plot response from HAPI server. Demos ----- <https://github.com/hapi-server/client-python/blob/master/hapiclient/plot/hapiplot_test.py> Usage ----- data, meta = hapiplot(server, dataset, params, start, stop, **kwargs) or meta = hapiplot(data, meta, **kwargs) where data and meta are return values from `hapi()`. All parameters are plotted. If a parameter has a bins attribute, it is plotted using `heatmap()`. Otherwise, it is plotted using `timeseries()`. Returns ------- `data` is the same as that returned from `hapi()`. `meta` is the same as that returned from `hapi()` with the additon of meta['parameters'][i]['hapiplot']['figure'] is a reference to the figure (e.g., plt.gcf()). Usage example: >>> fig = meta['parameters'][i]['hapiplot']['figure'] >>> fig.set_facecolor('blue') >>> fig.axes[0].set_ylabel('new y-label') >>> fig.axes[0].set_title('new title\\nsubtitle\\nsubtitle') >>> fig.tight_layout() meta['parameters'][i]['hapiplot']['colorbar'] is a reference to the colorbar on the figure (if parameter plotted as a heatmap) meta['parameters'][i]['hapiplot']['image'] is PNG, PDF, or SVG data and is included only if `returnimage=True`. Usage example: >>> img = meta['parameters'][i]['hapiplot']['image'] >>> Image.open(io.BytesIO(img)).show() >>> # or >>> f = open('/tmp/a.png', 'wb') >>> f.write(img) >>> f.close() See Also --------- hapi: Get data from a HAPI server timeseries: Used by `hapiplot()` to HAPI parameters with no `bins` heatmap: Used by `hapiplot()` to HAPI parameters with `bins` <https://github.com/hapi-server/client-python-notebooks> kwargs ------ * logging: [False] Display console messages * usecache: [True] Use cached data * tsopts: {} kwargs for the `timeseries()` function * hmopts: {} kwargs for the `heatmap()` function Other kwargs ------------ * returnimage: [False] If True, `hapiplot()` returns binary image data * returnformat: [png], svg, or pdf * cachedir: Directory to store images. Default is hapiclient.hapi.cachedir() * useimagecache: [True] Used cached image (when returnimage=True) * saveimage: [False] Save image to `cachedir` * saveformat: [png], svg, or pdf Example -------- >>> server = 'http://hapi-server.org/servers/TestData/hapi' >>> dataset = 'dataset1' >>> start = '1970-01-01T00:00:00' >>> stop = '1970-01-02T00:00:00' >>> params = 'scalar,vector' >>> opts = {'logging': True} >>> >>> from hapiclient import hapiplot >>> hapiplot(server, dataset, params, start, stop, **opts) >>> >>> # or >>> >>> from hapiclient import hapi, hapiplot >>> data, meta = hapi(server, dataset, params, start, stop, **opts) >>> hapiplot(data, meta, **opts) """ if len(args) == 5: # For consistency with gallery and autoplot functions, allow useage of # hapiplot(server, dataset, parameters, start, stop, **kwargs) from hapiclient.hapi import hapiopts from hapiclient.hapi import hapi kwargs_allowed = hapiopts() kwargs_reduced = {} # Extract hapi() options from kwargs for key, value in kwargs.items(): if key in kwargs_allowed: kwargs_reduced[key] = value data, meta = hapi(args[0], args[1], args[2], args[3], args[4], **kwargs_reduced) meta = hapiplot(data, meta, **kwargs) return data, meta else: data = args[0] meta = args[1] # Default options opts = { 'logging': False, 'saveimage': False, 'returnimage': False, 'usecache': True, 'useimagecache': True, 'cachedir': cachedir(), 'backend': 'default', 'style': 'fast', 'title': '', 'ztitle': '', 'xlabel': '', 'ylabel': '', 'zlabel': '', 'logx': False, 'logy': False, 'logz': False, 'tsopts': {}, 'hmopts': {}, 'backend': 'default', 'rcParams': { 'savefig.dpi': 144, 'savefig.format': 'png', 'savefig.bbox': 'tight', 'savefig.transparent': False, 'figure.max_open_warning': 50, 'figure.figsize': (7, 3), 'figure.dpi': 144, 'axes.titlesize': 10, "font.family": "serif", "font.serif": rcParams['font.serif'], "font.weight": "normal" }, '_rcParams': { 'figure.bbox': 'standard' } } # Override defaults opts = setopts(opts, kwargs) from hapiclient import __version__ log('Running hapi.py version %s' % __version__, opts) # _rcParams are not actually rcParams: #'figure.bbox': 'standard', # Set to 'tight' to have fig.tight_layout() called before figure shown. if opts["saveimage"]: # Create cache directory dir = cachedir(opts['cachedir'], meta['x_server']) if not os.path.exists(dir): os.makedirs(dir) # Convert from NumPy array of byte literals to NumPy array of # datetime objects. timename = meta['parameters'][0]['name'] Time = hapitime2datetime(data[timename]) if len(meta["parameters"]) == 1: a = 0 # Time is only parameter else: a = 1 # Time plus another parameter for i in range(a, len(meta["parameters"])): meta["parameters"][i]['hapiplot'] = {} name = meta["parameters"][i]["name"] # Return cached image (case where we are returning binary image data) # imagepath() options. Only need filename under these conditions. if opts['saveimage'] or (opts['returnimage'] and opts['useimagecache']): # Will use given rc style parameters and style name to generate file name. # Assumes rc parameters of style and hapiplot defaults never change. styleParams = {} fmt = opts['rcParams']['savefig.format'] if 'rcParams' in kwargs: styleParams = kwargs['rcParams'] if 'savefig.format' in kwargs['rcParams']: kwargs['rcParams']['savefig.format'] fnameimg = imagepath(meta, i, opts['cachedir'], styleParams, fmt) if opts['useimagecache'] and opts['returnimage'] and os.path.isfile( fnameimg): log('Returning cached binary image data in ' + fnameimg, opts) meta["parameters"][i]['hapiplot']['imagefile'] = fnameimg with open(fnameimg, "rb") as f: meta["parameters"][i]['hapiplot']['image'] = f.read() continue name = meta["parameters"][i]["name"] log("Plotting parameter '%s'" % name, opts) if len(data[name].shape) > 3: # TODO: Implement more than 2 dimensions? warning( 'Parameter ' + name + ' has size with more than 2 dimensions. Plotting first two only.' ) continue # If parameter has a size with two elements, e.g., [N1, N2] # create N2 plots. if len(data[name].shape) == 3: # shape = (Time, N1, N2) nplts = data[name].shape[1] if opts['returnimage']: warning( 'Only returning first image for parameter with size[1] > 1.' ) nplts = 1 for j in range(nplts): timename = meta['parameters'][0]['name'] # Name to indicate what is plotted name_new = name + "[:," + str(j) + "]" # Reduced data ND Array datar = np.ndarray(shape=(data[name].shape[0]), dtype=[(timename, data.dtype[timename]), (name_new, data[name].dtype.str, data.dtype[name].shape[1])]) datar[timename] = data[timename] datar[name_new] = data[name][:, j] # Copy metadata to create a reduced metadata object metar = meta.copy() # Shallow copy metar["parameters"] = [] # Create parameters array with elements of Time parameter ... metar["parameters"].append(meta["parameters"][0]) # .... and this parameter metar["parameters"].append(meta["parameters"][i].copy()) # Give new name to indicate it is a subset of full parameter metar["parameters"][1]['name'] = name_new metar["parameters"][1]['name_orig'] = name # New size is N1 metar["parameters"][1]['size'] = [ meta["parameters"][i]['size'][1] ] if 'units' in metar["parameters"][1]: if type(meta["parameters"][i]['units'] ) == str or meta["parameters"][i]['units'] == None: # Same units applies to all dimensions metar["parameters"][1]["units"] = meta["parameters"][ i]['units'] else: metar["parameters"][1]["units"] = meta["parameters"][ i]['units'][j] if 'label' in metar["parameters"][1]: if type(meta["parameters"][i]['label']) == str: # Same label applies to all dimensions metar["parameters"][1]["label"] = meta["parameters"][ i]['label'] else: metar["parameters"][1]["label"] = meta["parameters"][ i]['label'][j] # Extract bins corresponding to jth column of data[name] if 'bins' in metar["parameters"][1]: metar["parameters"][1]['bins'] = [] metar["parameters"][1]['bins'].append( meta["parameters"][i]['bins'][j]) # rcParams is modified by setopts to have all rcParams. # reset to original passed rcParams so that imagepath # computes file name based on rcParams passed to hapiplot. if 'rcParams' in kwargs: opts['rcParams'] = kwargs['rcParams'] metar = hapiplot(datar, metar, **opts) meta["parameters"][i]['hapiplot'] = metar["parameters"][i][ 'hapiplot'] return meta if 'name_orig' in meta["parameters"][i]: title = meta["x_server"] + "\n" + meta["x_dataset"] + " | " + meta[ "parameters"][i]['name_orig'] else: title = meta["x_server"] + "\n" + meta["x_dataset"] + " | " + name as_heatmap = False if 'size' in meta['parameters'][ i] and meta['parameters'][i]['size'][0] > 10: as_heatmap = True if 'bins' in meta['parameters'][i]: as_heatmap = True if 'units' in meta["parameters"][i] and type( meta["parameters"][i]["units"]) == list: if as_heatmap: warning( "Not plotting %s as heatmap because components have different units." % meta["parameters"][i]["name"]) as_heatmap = False if as_heatmap: # Plot as heatmap hmopts = { 'returnimage': opts['returnimage'], 'transparent': opts['rcParams']['savefig.transparent'] } if meta["parameters"][i]["type"] == "string": warning( "Plots for only types double, integer, and isotime implemented. Not plotting %s." % meta["parameters"][i]["name"]) continue z = np.asarray(data[name]) if 'fill' in meta["parameters"][i] and meta["parameters"][i][ 'fill']: if meta["parameters"][i]["type"] == 'integer': z = z.astype('<f8', copy=False) z = fill2nan(z, meta["parameters"][i]['fill']) if 'bins' in meta['parameters'][i]: ylabel = meta["parameters"][i]['bins'][0]["name"] + " [" + meta[ "parameters"][i]['bins'][0]["units"] + "]" else: ylabel = "col %d" % i units = meta["parameters"][i]["units"] nl = "" if len(name) + len(units) > 30: nl = "\n" zlabel = name + nl + " [" + units + "]" if 'bins' in meta['parameters'][i]: if 'ranges' in meta["parameters"][i]['bins'][0]: bins = np.array(meta["parameters"][i]['bins'][0]["ranges"]) else: bins = np.array( meta["parameters"][i]['bins'][0]["centers"]) else: bins = np.arange(meta['parameters'][i]['size'][0]) dt = np.diff(Time) dtu = np.unique(dt) if len(dtu) > 1: #warning('Time values are not uniformly spaced. Bin width for ' # 'time will be based on time separation of consecutive time values.') if False and 'cadence' in meta: # Cadence != time bin width in general, so don't do this. # See https://github.com/hapi-server/data-specification/issues/75 # Kept for future reference when Parameter.bin.window or # Parameter.bin.windowWidth is added to spec. import isodate dt = isodate.parse_duration(meta['cadence']) if 'timeStampLocation' in meta: if meta['timeStampLocation'].lower() == "begin": Time = np.vstack((Time, Time + dt)) if meta['timeStampLocation'].lower() == "end": Time = np.vstack((Time - dt, Time)) if meta['timeStampLocation'].lower() == "center": Time = np.vstack((Time - dt / 2, Time + dt / 2)) else: # Default is center Time = np.vstack((Time - dt / 2, Time + dt / 2)) Time = np.transpose(Time) elif 'timeStampLocation' in meta: if meta['timeStampLocation'].lower() == "begin": Time = np.append(Time, Time[-1] + dtu[0]) if meta['timeStampLocation'].lower() == "end": Time = Time - dtu[0] Time = np.append(Time, Time[-1] + dtu[0]) if opts['xlabel'] != '' and 'xlabel' not in opts['hmopts']: hmopts['xlabel'] = opts['xlabel'] opts['hmopts']['ylabel'] = ylabel if opts['ylabel'] != '' and 'ylabel' not in opts['hmopts']: hmopts['ylabel'] = opts['ylabel'] opts['hmopts']['title'] = title if opts['title'] != '' and 'title' not in opts['hmopts']: hmopts['title'] = opts['title'] opts['hmopts']['zlabel'] = zlabel if opts['zlabel'] != '' and 'zlabel' not in opts['hmopts']: hmopts['zlabel'] = opts['zlabel'] if False: opts['hmopts']['ztitle'] = ztitle if opts['ztitle'] != '' and 'ztitle' not in opts['hmopts']: hmopts['ztitle'] = opts['ztitle'] if opts['logx'] is not False: hmopts['logx'] = True if opts['logy'] is not False: hmopts['logy'] = True if opts['logz'] is not False: hmopts['logz'] = True for key, value in opts['hmopts'].items(): hmopts[key] = value with rc_context(rc=opts['rcParams']): fig, cb = heatmap(Time, bins, np.transpose(z), **hmopts) meta["parameters"][i]['hapiplot']['figure'] = fig meta["parameters"][i]['hapiplot']['colorbar'] = cb else: tsopts = { 'logging': opts['logging'], 'returnimage': opts['returnimage'], 'transparent': opts['rcParams']['savefig.transparent'] } ptype = meta["parameters"][i]["type"] if ptype == "isotime": y = hapitime2datetime(data[name]) elif ptype == 'string': y = data[name].astype('U') else: y = np.asarray(data[name]) if 'fill' in meta["parameters"][i] and meta["parameters"][i][ 'fill']: if ptype == 'isotime' or ptype == 'string': Igood = y != meta["parameters"][i]['fill'] # Note that json reader returns fill to U not b. Nremoved = data[name].size - Igood.size if Nremoved > 0: # TODO: Implement masking so connected line plots will # show gaps as they do for NaN values. warning('Parameter ' + name + ' is of type ' + ptype + ' and has ' + str(Nremoved) + ' fill value(s). Masking is not implemented, ' 'so removing fill elements before plotting.') Time = Time[Igood] y = y[Igood] if ptype == 'integer': y = y.astype('<f8', copy=False) if ptype == 'integer' or ptype == 'double': y = fill2nan(y, meta["parameters"][i]['fill']) units = None if 'units' in meta["parameters"][i] and meta["parameters"][i][ 'units']: units = meta["parameters"][i]["units"] nl = "" if type(units) == str: if len(name) + len(units) > 30: nl = "\n" # TODO: Automatically figure out when this is needed. ylabel = name if units is not None and type(units) is not list: ylabel = name + nl + " [" + units + "]" if type(units) == list: ylabel = name if not 'legendlabels' in opts['tsopts']: legendlabels = [] if 'size' in meta['parameters'][i]: for l in range(0, meta['parameters'][i]['size'][0]): bin_label = '' bin_name = '' col_name = '' if 'bins' in meta['parameters'][i]: bin_name = meta['parameters'][i]['bins'][0]['name'] if 'label' in meta['parameters'][i]['bins'][0]: if type(meta['parameters'][i]['bins'][0] ['label']) == str: bin_name = meta['parameters'][i]['bins'][ 0]['label'] else: bin_name = meta['parameters'][i]['bins'][ 0]['label'][l] sep = '' if 'centers' in meta['parameters'][i]['bins'][ 0] and 'ranges' in meta['parameters'][i][ 'bins'][0]: bin_name = bin_name + ' bin with' sep = ';' bin_label = '' if 'units' in meta['parameters'][i]['bins'][0]: bin_units = meta['parameters'][i]['bins'][0][ 'units'] if type(bin_units) == list: if type(bin_units[l]) == str: bin_units = ' [' + bin_units[l] + ']' elif bin_units[l] == None: bin_units = ' []' else: bin_units = '' else: if type(bin_units) == str: bin_units = ' [' + bin_units + ']' else: bin_units = '' if 'centers' in meta['parameters'][i]['bins'][0]: if meta['parameters'][i]['bins'][0]['centers'][ l] is not None: bin_label = bin_label + ' center = ' + str( meta['parameters'][i]['bins'][0] ['centers'][l]) + bin_units else: bin_label = bin_label + ' center = None' if 'ranges' in meta['parameters'][i]['bins'][0]: if type(meta['parameters'][i]['bins'][0] ['ranges'][l]) == list: bin_label = bin_label + sep + ' range = [' + str( meta['parameters'][i]['bins'][0] ['ranges'][l][0]) + ', ' + str( meta['parameters'][i]['bins'][0] ['ranges'][l][1]) + ']' + bin_units else: bin_label = bin_label + sep + ' range = [None]' if bin_label != '': bin_label = 'bin:' + bin_label col_name = bin_name + '#%d' % l if col_name == '': col_name = 'col #%d' % l if 'label' in meta['parameters'][i]: #print(meta) #print(meta['parameters'][i]['label']) if type(meta['parameters'][i]['label']) == list: col_name = meta['parameters'][i]['label'][l] if type(units) == list: if len(units) == 1: legendlabels.append(col_name + ' [' + units[0] + '] ' + bin_label) elif type(units[l]) == str: legendlabels.append(col_name + ' [' + units[l] + '] ' + bin_label) elif units[l] == None: legendlabels.append(col_name + ' [] ' + bin_label) else: legendlabels.append(col_name + ' ' + bin_label) else: # Units are on y label legendlabels.append(col_name + ' ' + bin_label) tsopts['legendlabels'] = legendlabels # If xlabel in opts and opts['tsopts'], warn? if opts['xlabel'] != '' and 'xlabel' not in opts['tsopts']: tsopts['xlabel'] = opts['xlabel'] tsopts['ylabel'] = ylabel if opts['ylabel'] != '' and 'ylabel' not in opts['tsopts']: tsopts['ylabel'] = opts['ylabel'] tsopts['title'] = title if opts['title'] != '' and 'title' not in opts['tsopts']: tsopts['title'] = opts['title'] if opts['logx'] is not False and 'logx' not in opts['tsopts']: tsopts['logx'] = True if opts['logy'] is not False and 'logy' not in opts['tsopts']: tsopts['logy'] = True # Apply tsopts for key, value in opts['tsopts'].items(): tsopts[key] = value with rc_context(rc=opts['rcParams']): fig = timeseries(Time, y, **tsopts) meta["parameters"][i]['hapiplot']['figure'] = fig if opts['saveimage']: log('Writing %s' % fnameimg, opts) meta["parameters"][i]['hapiplot']['imagefile'] = fnameimg else: from io import BytesIO fnameimg = BytesIO() if opts['returnimage']: with rc_context(rc=opts['rcParams']): fig.canvas.print_figure(fnameimg) if opts['saveimage']: with open(fnameimg, mode='rb') as f: meta["parameters"][i]['hapiplot']['image'] = f.read() else: meta["parameters"][i]['hapiplot']['image'] = fnameimg.getvalue( ) else: with rc_context(rc=opts['rcParams']): fig.savefig(fnameimg) # Two calls to fig.tight_layout() may be needed b/c of bug in PyQt: # https://github.com/matplotlib/matplotlib/issues/10361 if opts['_rcParams']['figure.bbox'] == 'tight': fig.tight_layout() return meta