def do_fetch(self): channel_length = int(self.length) outdata=np.zeros(1024*2*256+1) ## !! really should put a wrapper around gethjdata to do common stuff # outfile is only needed if the direct passing of binary won't work # with tempfile.NamedTemporaryFile(prefix="pyfusion_") as outfile: ierror, getrets = gethjdata.gethjdata(self.shot,channel_length,self.path, verbose=VERBOSE, opt=1, ierror=2, outdata=outdata, outname='') if ierror != 0: raise LookupError('hj Okada style data not found for {s}:{c}'.format(s=self.shot, c=self.path)) ch = Channel(self.path, Coords('dummy', (0,0,0))) # the intent statement causes the out var to be returned in the result lsit # looks like the time,data is interleaved in a 1x256 array # it is fed in as real*64, but returns as real*32! (as per fortran decl) debug_(pyfusion.DEBUG, 4, key='Heliotron_fetch', msg='after call to getdata') # timebase in secs (ms in raw data) - could add a preferred unit? # this is partly allowed for in savez_compressed, newload, and # for plotting, in the config file. # important that the 1e-3 be inside the Timebase() output_data = TimeseriesData(timebase=Timebase(1e-3 * getrets[1::2]), signal=Signal(getrets[2::2]), channels=ch) output_data.meta.update({'shot':self.shot}) if pyfusion.VERBOSE>0: print('HJ config name',self.config_name) output_data.config_name = self.config_name stprms = get_static_params(shot=self.shot,signal=self.path) if len(list(stprms)) == 0: # maybe this should be ignored - how do we use it? raise LookupError(' failure to get params for {shot}:{path}' .format(shot=self.shot, path=self.path)) output_data.params = stprms return output_data
def do_fetch(self): channel_length = int(self.length) outdata = np.zeros(1024 * 2 * 256 + 1) ## !! really should put a wrapper around gethjdata to do common stuff # outfile is only needed if the direct passing of binary won't work # with tempfile.NamedTemporaryFile(prefix="pyfusion_") as outfile: # get in two steps to make debugging easier allrets = gethjdata.gethjdata(self.shot, channel_length, self.path, verbose=VERBOSE, opt=1, ierror=2, isample=-1, outdata=outdata, outname='') ierror, isample, getrets = allrets if ierror != 0: raise LookupError( 'hj Okada style data not found for {s}:{c}'.format( s=self.shot, c=self.path)) ch = Channel(self.path, Coords('dummy', (0, 0, 0))) # the intent statement causes the out var to be returned in the result lsit # looks like the time,data is interleaved in a 1x256 array # it is fed in as real*64, but returns as real*32! (as per fortran decl) debug_(pyfusion.DEBUG, 4, key='Heliotron_fetch', msg='after call to getdata') # timebase in secs (ms in raw data) - could add a preferred unit? # this is partly allowed for in savez_compressed, newload, and # for plotting, in the config file. # important that the 1e-3 be inside the Timebase() output_data = TimeseriesData(timebase=Timebase( 1e-3 * getrets[1::2][0:isample]), signal=Signal(getrets[2::2][0:isample]), channels=ch) output_data.meta.update({'shot': self.shot}) if pyfusion.VERBOSE > 0: print('HJ config name', self.config_name) output_data.config_name = self.config_name stprms = get_static_params(shot=self.shot, signal=self.path) if len(list(stprms) ) == 0: # maybe this should be ignored - how do we use it? raise LookupError( ' failure to get params for {shot}:{path}'.format( shot=self.shot, path=self.path)) output_data.params = stprms return output_data
def do_fetch(self): # my W7X shots are of the form from_utc, to_utc # or date (8dig) and shot (progId) # the format is in the acquisition properties, to avoid # repetition in each individual diagnostic if self.shot[1]>1e9: # we have start and end in UTC f,t = self.shot else: f,t = get_shot_utc(*self.shot) # A URL STYLE diagnostic - used for a one-off # this could be moved to setup so that the error info is more complete if hasattr(self,'url'): fmt = self.url+'_signal.json?from={shot_f}&upto={shot_t}' fmt = self.url+'_signal.json?from={shot_f}&upto={shot_t}&nSamples=200000' params = {} else: # a pattern-based one - used for arrays of probes if hasattr(self,'fmt'): # does the diagnostic have one? fmt = self.fmt elif hasattr(self.acq,'fmt'): # else use the acq.fmt fmt = self.acq.fmt else: # so far we have no quick way to check the server is online raise LookupError('no fmt - perhaps pyfusion.cfg has been ' 'edited because the url is not available') params = eval('dict('+self.params+')') if 'upto' not in fmt: fmt += '_signal.json?from={shot_f}&upto={shot_t}' if ('nSamples' not in fmt) and (pyfusion.NSAMPLES != 0): fmt += '&nSamples={ns}'.format(ns=pyfusion.NSAMPLES) params.update(shot_f=f, shot_t=t) url = fmt.format(**params) if pyfusion.CACHE: print('using wget on {url}'.format(url=url)) os.system('wget -x "{url}"'.format(url=url)) # now read from the local copy - it is in the wd, so only // # but it seems we need the full path for now url = url.replace('http://','file:///home/bdb112/pyfusion/working/pyfusion/') print('now trying the cached copy we just grabbed: {url}'.format(url=url)) if pyfusion.VERBOSE > 0: print('===> fetching url {u}'.format(u=url)) # seems to take twice as long as timeout requested. # haven't thought about python3 for the json stuff yet try: # dat = json.load(urlopen(url,timeout=pyfusion.TIMEOUT)) works # but follow example in # http://webservices.ipp-hgw.mpg.de/docs/howtoREST.html#python, dat = json.loads(urlopen(url,timeout=pyfusion.TIMEOUT).read().decode()) except socket.timeout: # should check if this is better tested by the URL module print('****** first timeout error *****') dat = json.load(urlopen(url,timeout=3*pyfusion.TIMEOUT)) except Exception as reason: if pyfusion.VERBOSE: print('********Exception***** on {c}: {u} \n{r}' .format(c=self.config_name, u=url, r=reason)) raise # this form will default to repair = 2 for all LP probes. default_repair = 2 if 'Desc.82/' in url else 0 # this form follows the config file settings self.repair = int(self.repair) if hasattr(self, 'repair') else default_repair dimraw = np.array(dat['dimensions']) dim = dimraw - dimraw[0] if self.repair == 0: pass # leave as is # need at least this clipping for Langmuir probes in Op1.1 elif self.repair == 1: dim = np.clip(dim, 0, 1e99) elif self.repair == 2: dim, msg = regenerate_dim(dim) if msg is not None: print('shot {s}, {c}: {m}' .format(s=self.shot, c=self.config_name, m=msg)) else: raise ValueError('repair value of {r} not understood'.format(r=self.repair)) if pyfusion.VERBOSE>2: print('repair',self.repair) #ch = Channel(self.config_name, Coords('dummy', (0,0,0))) # this probably should be in base.py coords = get_coords_for_channel(**self.__dict__) # used to be bare_chan? should we include - signs? ch = Channel(self.config_name, coords) output_data = TimeseriesData(timebase=Timebase(1e-9*dim), signal=Signal(dat['values']), channels=ch) output_data.meta.update({'shot': self.shot}) output_data.utc = [dat['dimensions'][0], dat['dimensions'][-1]] output_data.units = dat['units'] if 'units' in dat else '' # this is a minor duplication - at least it gets saved via params params['data_utc'] = output_data.utc # Warning - this could slow things down! - but allows corrupted time to be re-calculated as algorithms improve. params['diff_dimraw'] = dimraw params['diff_dimraw'][1:] = np.diff(dimraw) params['pyfusion_version'] = pyfusion.version.get_version() if pyfusion.VERBOSE > 0: print('shot {s}, config name {c}' .format(c=self.config_name, s=self.shot)) output_data.config_name = self.config_name debug_(pyfusion.DEBUG, 2, key='W7XDataFetcher') output_data.params = params ### the total shot utc. output_data.utc = [f, t] return output_data
def try_fetch_local(input_data, bare_chan): """ return data if in the local cache, otherwise None doesn't work for single channel HJ data. sgn (not gain) be only be used at the single channel base/fetch level """ for each_path in pyfusion.config.get('global', 'localdatapath').split('+'): # check for multi value shot number, e.g. utc bounds for W7-X data shot = input_data.shot # MDSplus style path to access sorted files into folders by shot path, patt = os.path.split(each_path) # print(patt) if len(patt) == 2*len(patt.replace('~','')): # a subdir code based on date subdir = '' # reverse the order of both the pattern and the shot so a posn is 0th char strshot = str(shot[0]) if len(np.shape(shot))>0 else str(shot) revshot = strshot[::-1] for i,ch in enumerate(patt): if (i%2) == 0: if ch != '~': raise LookupError("Can't parse {d} as a MDS style subdir" .format(d=patt)) continue subdir += revshot[ord(ch) - ord('a')] else: subdir = patt debug_(pyfusion.DEBUG, 3, key='MDS style subdir', msg=each_path) each_path = os.path.join(path, subdir) if isinstance(shot, (tuple, list, ndarray)): shot_str = '{s0}_{s1}'.format(s0=shot[0], s1=shot[1]) else: shot_str = str(shot) input_data.localname = os.path.join(each_path, '{shot}_{bc}.npz' .format(shot=shot_str, bc=bare_chan)) # original - data_filename %filename_dict) if pyfusion.VERBOSE>2: print(each_path,input_data.localname) files_exist = os.path.exists(input_data.localname) debug_(pyfusion.DEBUG, 3, key='try_local_fetch') if files_exist: intmp = np.any([st in input_data.localname.lower() for st in ['tmp', 'temp']]) # add anything you wish to warn about if pyfusion.VERBOSE>0 or intmp: if intmp: pyfusion.logging.warning('Using {f} in temporary directory!' .format(f=input_data.localname)) print('found local data in {f}'. format(f=input_data.localname)) break if not files_exist: return None signal_dict = newload(input_data.localname) if 'params' in signal_dict and 'name' in signal_dict['params'] and 'W7X_L5' in signal_dict['params']['name']: if signal_dict['params']['pyfusion_version'] < '0.6.8b': raise ValueError('probe assignments in error LP11-22 in {fn}' .format(fn=input_data.localname)) if np.nanmax(signal_dict['timebase']) == 0: pyfusion.logging.warning('making a fake timebase for {fn}' .format(fn=input_data.localname)) signal_dict['timebase'] = 2e-6*np.cumsum(1.0 + 0*signal_dict['signal']) coords = get_coords_for_channel(**input_data.__dict__) #ch = Channel(bare_chan, Coords('dummy', (0,0,0))) ch = Channel(bare_chan, coords) output_data = TimeseriesData(timebase=Timebase(signal_dict['timebase']), signal=Signal(signal_dict['signal']), channels=ch) # bdb - used "fetcher" instead of "self" in the "direct from LHD data" version # when using saved files, should use the name - not input_data.config_name # it WAS the config_name coming from the raw format. output_data.config_name = bare_chan # would be nice to get to the gain here - but how - maybe setup will get it output_data.meta.update({'shot':input_data.shot}) if 'params' in signal_dict: output_data.params = signal_dict['params'] if 'utc' in signal_dict['params']: output_data.utc = signal_dict['params'].get('utc',None) else: # yes, it seems like duplication, but no output_data.utc = None output_data.params = dict(comment = 'old npz file has no params') oldsrc = ', originally from ' + output_data.params['source'] if 'source' in output_data.params else '' output_data.params.update(dict(source='from npz cache' + oldsrc)) return(output_data)
def read_data_from_file(fetcher): prm_dict = read_prm_file(fetcher.basename+".prm") bytes = int(prm_dict['DataLength(byte)'][0]) bits = int(prm_dict['Resolution(bit)'][0]) if 'ImageType' not in prm_dict: #if so assume unsigned bytes_per_sample = 2 dat_arr = Array.array('H') offset = 2**(bits-1) dtyp = np.dtype('uint16') else: if prm_dict['ImageType'][0] == 'INT16': bytes_per_sample = 2 if prm_dict['BinaryCoding'][0] == 'offset_binary': dat_arr = Array.array('H') offset = 2**(bits-1) dtyp = np.dtype('uint16') elif prm_dict['BinaryCoding'][0] == "shifted_2's_complementary": dat_arr = Array.array('h') offset = 0 dtyp = np.dtype('int16') # this was added for the VSL digitisers elif prm_dict['BinaryCoding'][0] == "2's_complementary": # not sure about this dat_arr = Array.array('h') offset = 0 dtyp = np.dtype('int16') else: raise NotImplementedError(' binary coding {pd}'.format(pd=prm_dict['BinaryCoding'])) """ fp = open(fetcher.basename + '.dat', 'rb') dat_arr.fromfile(fp, bytes/bytes_per_sample) fp.close() """ dat_arr = np.fromfile(fetcher.basename + '.dat',dtyp) #print(dat_arr[0:10]) #print(fetcher.config_name) if fetcher.timeOK: # we have retrieve_t data! # check for ArrayDataType: float is float32 # skip is 0 as there is no initial digitiser type token tprm_dict = read_prm_file(fetcher.basename+".tprm",skip=0) if pyfusion.VERBOSE>1: print(tprm_dict) ftype = tprm_dict['ArrayDataType'][0] floats = dict(float = 'float32', double='float64') timebase = np.fromfile(fetcher.basename + '.time', np.dtype(floats[ftype])) else: # use the info from the .prm file clockHz = None if 'SamplingClock' in prm_dict: clockHz = double(prm_dict['SamplingClock'][0]) if 'SamplingInterval' in prm_dict: clockHz = clockHz/double(prm_dict['SamplingInterval'][0]) if 'ClockInterval(uSec)' in prm_dict: # VSL dig clockHz = 1e6/double(prm_dict['ClockInterval(uSec)'][0]) if 'ClockSpeed' in prm_dict: if clockHz != None: pyfusion.utils.warn('Apparent duplication of clock speed information') clockHz = double(prm_dict['ClockSpeed'][0]) clockHz = LHD_A14_clk(fetcher.shot) # see above if clockHz != None: if 'PreSamples/Ch' in prm_dict: # needed for "WE" e.g. VSL pretrig = float(prm_dict['PreSamples/Ch'][0])/clockHz else: pretrig = 0. timebase = arange(len(dat_arr))/clockHz - pretrig else: debug_(pyfusion.DEBUG, level=4, key='LHD read debug') raise NotImplementedError("timebase not recognised") debug_(pyfusion.DEBUG, level=4, key='LHD read debug') ch = Channel("{dn}-{dc}" .format(dn=fetcher.diag_name, dc=fetcher.channel_number), Coords('dummy', (0,0,0))) # if fetcher.gain != None: # this may have worked once...not now! # gain = fetcher.gain # else: # was - crude!! if channel == 20: arr = -arr # (MP5 and HMP13 need flipping) try: gain = float(fetcher.gain) except: gain = 1 # dodgy - should only apply to a diag in a list - don't want to define -MP5 separately - see other comment on "-" #if fetcher.diag_name[0]=='-': flip = -1 #else: flip = 1 # not sure if this needs a factor of two for RangePolarity,Bipolar (A14) rng=None for key in 'Range,Range(V)'.split(','): # equivalent alteratives rng=prm_dict.get(key) if rng is not None: break scale_factor = flip*double(rng[0])/(2**bits) # not sure how this worked before I added array() - but has using # array slowed things? I clearly went to trouble using tailored ints above? # - yes array(dat_arr) takes 1.5 sec for 4MS!! # Answer - using numpy fromfile('file',dtype=numpy.int16) - 16ms instead! # NOTE! ctype=int32 required if the array is already an np array - can be fixed once Array code is removed (next version) output_data = TimeseriesData(timebase=Timebase(timebase), signal=Signal(scale_factor*gain*(array(dat_arr,dtype=np.int32)-offset)), channels=ch) #print(output_data.signal[0:5],offset,(array(dat_arr)-offset)[0:5]) output_data.meta.update({'shot':fetcher.shot}) output_data.config_name = fetcher.config_name output_data.params = prm_dict return output_data
def do_fetch(self): # Definitions: # data_utc: is meant to be the first and last timestamps of the saved # or the retrieved data if not saved. (at least from 3 march 2016) # shot_f, shot_t: are beginning and end of shot (from programs) at least from 3 march 2016 # utc: seems to be same as data_utc # seg_f_u: up til 20 march 2020, seg_f_u appears to be the *requested* segment, (now fixed - renamed to req_f_u) # utc0 - only 9.9 or so. Should be in file or set in base.py # In this revision, the only changes are - allow for self.time_range, # variable names f, t changed to f_u, t_u (the desired data utc) # and comment fixes, # in preparation for including time_range and other cleanups. # My W7X shots are either of the form from_utc, to_utc, # or date (8dig) and shot (progId) # the format is in the acquisition properties, to avoid # repetition in each individual diagnostic t_start = seconds() if self.shot[1] > 1e9 or hasattr( self, 'time_range') and self.time_range is not None: # we have start and end in UTC instead of shot no # need the shot and utc to get t1 to set zero t if hasattr(self, 'time_range') and self.time_range is not None: if self.shot[1] > 1e9: raise ValueError( "Can't supply shot as a utc pair and specify a time_range" ) actual_shot = self.shot f_u = None # set to None to make sure we don't use it else: f_u, t_u = self.shot # Initialize to the requested utc range actual_shot = get_shot_number([f_u, t_u]) progs = get_programs(actual_shot) # need shot to look up progs # need prog to get trigger - not tested for OP1.1 if len(progs) > 1: raise LookupError( 'fetch: too many programs found - covers > 1 shot?') #this_prog = [prog for prog in progs if (f_u >= progs[prog]['from'] and # t_u <= progs[prog]['upto'])] if len(progs) == 1: this_prog = progs.values()[0] # on shot 20180724,10, this trigger[1] is an empty list trigger = this_prog['trigger']['1'] # This fallback to trigger[0] mean that more rubbish shots are saved than # if we only look at the proper trigger (1) - here is an example # run pyfusion/examples/plot_signals shot_number=[20180724,10] diag_name="W7X_UTDU_LP15_I" dev_name='W7X' if len(trigger) == 0: # example above print('** No Trigger 1 on shot {s}'.format(s=actual_shot)) debug_(pyfusion.DEBUG, 0, key="noTrig1", msg="No Trigger 1 found") # take any that exist, at 60 trigger = [ trr[0] + int(60 * 1e9) for trr in this_prog['trigger'].values() if len(trr) > 0 ] if len(trigger) == 0: raise LookupError( 'No Triggers at all on shot {s}'.format( s=actual_shot)) utc_0 = trigger[ 0] # utc_0 is the first trigger (usu 61 sec ahead of data) else: print( 'Unable to look up programs - assuming this is a test shot' ) utc_0 = f_u # better than nothing - probably a 'private' test/calibration shot if f_u is None: # shorthand for have time range f_u = utc_0 + int(1e9 * (self.time_range[0])) # + 61)) t_u = utc_0 + int( 1e9 * (self.time_range[1])) # + 61)) was 61 rel to prog['from'] else: # self.shot is an 8,3 digit shot and time range not specified actual_shot = self.shot f_u, t_u = get_shot_utc( actual_shot ) # f_u is the start of the overall shot - i.e about plasma time -61 sec. # at present, ECH usually starts 61 secs after t0 # so it is usually sufficient to request a later start than t0 pre_trig_secs = self.pre_trig_secs if hasattr( self, 'pre_trig_secs') else 0.3 # should get this from programs really - code is already above. We need to just move it. pyfusion.utils.warn('fetch: still using hard-coded 61 secs') utc_0 = f_u + int(1e9 * (61)) # utc_0 is plasma initiation in utc f_u = utc_0 - int( 1e9 * pre_trig_secs) # f_u is the first time wanted # make sure we have the following defined regardless of how we got here shot_f_u, shot_t_u = get_shot_utc(actual_shot) req_f_u = f_u # req_f_u is the start of the desired data segment - sim. for req_t_u req_t_u = t_u # A URL STYLE diagnostic - used for a one-off rather than an array # this could be moved to setup so that the error info is more complete if hasattr(self, 'url'): fmt = self.url # add from= further down: +'_signal.json?from={req_f_u}&upto={req_t_u}' params = {} # check consistency - # url should be literal - no params (only for fmt) - gain, units are OK as they are not params if hasattr(self, 'params'): pyfusion.utils.warn( 'URL diagnostic {n} should not have params <{p}>'.format( n=self.config_name, p=self.params)) else: # a pattern-based one - used for arrays of probes if hasattr(self, 'fmt'): # does the diagnostic have one? fmt = self.fmt elif hasattr(self.acq, 'fmt'): # else use the acq.fmt fmt = self.acq.fmt else: # so far we have no quick way to check the server is online raise LookupError('no fmt - perhaps pyfusion.cfg has been ' 'edited because the url is not available') params = eval('dict(' + self.params + ')') # Originally added to fix up erroneous ECH alias mapping if ECH - only # 6 sources work if I don't. But it seems to help with many others # This implementation is kludgey but proves the principle, and # means we don't have to refer to any raw.. signals # would be nice if they made a formal way to do this. if 'upto' not in fmt: fmt += '_signal.json?from={req_f_u}&upto={req_t_u}' assert req_f_u == f_u, 'req_f_u error' assert req_t_u == t_u, 'req_t_u error' # params.update(req_f_u=req_f_u, req_t_u=req_t_u, shot_f_u=shot_f_u) url = fmt.format(**params) # substitute the channel params debug_(pyfusion.DEBUG, 2, key="url", msg="middle of work on urls") if np.any([ nm in url for nm in 'Rf,Tower5,MainCoils,ControlCoils,TrimCoils,Mirnov,Interfer,_NBI_' .split(',') ]): from pyfusion.acquisition.W7X.get_url_parms import get_signal_url # replace the main middle bit with the expanded one from the GUI tgt = url.split('KKS/')[1].split('/scaled')[0].split('_signal')[0] # construct a time filter for the shot self.tgt = tgt # for the sake of error_info filt = '?filterstart={req_f_u}&filterstop={req_t_u}'.format( **params) # get the url with the filter url = url.replace(tgt, get_signal_url(tgt, filt)).split('KKS/')[-1] # take the filter back out - we will add the exact one later url = url.replace(filt, '/') # nSamples now needs a reduction mechanism http://archive-webapi.ipp-hgw.mpg.de/ # minmax is increasingly slow for nSamples>10k, 100k hopeless # Should ignore the test comparing the first two elements of the tb # prevent reduction (NSAMPLES=...) to avoid the bug presently in codac if (('nSamples' not in url) and (pyfusion.NSAMPLES != 0) and not (hasattr(self, 'allow_reduction') and int(self.allow_reduction) == 0)): url += '&reduction=minmax&nSamples={ns}'.format( ns=pyfusion.NSAMPLES) debug_(pyfusion.DEBUG, 2, key="url", msg="work on urls") # we need %% in pyfusion.cfg to keep py3 happy # however with the new get_signal_url, this will all disappear if sys.version < '3.0.0' and '%%' in url: url = url.replace('%%', '%') if 'StationDesc.82' in url: # fix spike bug in scaled QRP data url = url.replace('/scaled/', '/unscaled/') if pyfusion.CACHE: # Needed for improperly configured cygwin systems: e.g.IPP Virual PC # Perhaps this should be executed at startup of pyfusion? cygbin = "c:\\cygwin\\bin" if os.path.exists(cygbin) and not cygbin in os.environ['path']: os.environ['path'] += ";" + cygbin print('using wget on {url}'.format(url=url)) retcode = os.system('wget -x "{url}"'.format(url=url)) # retcode = os.system('c:\\cygwin\\bin\\bash.exe -c "/bin/wget {url}"'.format(url=url)) debug_(retcode != 0, level=1, key='wget', msg="wget error or DEBUG='wget'") # now read from the local copy - seems like urls need full paths # appears to be a feature! http://stackoverflow.com/questions/7857416/file-uri-scheme-and-relative-files # /home/bdb112/pyfusion/working/pyfusion/archive-webapi.ipp-hgw.mpg.de/ArchiveDB/codac/W7X/CoDaStationDesc.82/DataModuleDesc.181_DATASTREAM/7/Channel_7/scaled/_signal.json?from=1457626020000000000&upto=1457626080000000000&nSamples=10000 # url = url.replace('http://','file:///home/bdb112/pyfusion/working/pyfusion/') url = url.replace('http://', 'file:/' + os.getcwd() + '/') if 'win' in os.sys.platform: # weven thoug it seems odd, want 'file:/c:\\cygwin\\home\\bobl\\pyfusion\\working\\pyfusion/archive-webapi.ipp-hgw.mpg.de/ArchiveDB/codac/W7X/CoDaStationDesc.82/DataModuleDesc.192_DATASTREAM/4/Channel_4/scaled/_signal.json@from=147516863807215960&upto=1457516863809815961' url = url.replace('?', '@') # nicer replace - readback still fails in Win, untested on unix systems print('now trying the cached copy we just grabbed: {url}'.format( url=url)) if (req_f_u > shot_t_u) or (req_t_u < shot_f_u): pyfusion.utils.warn( 'segment requested is outside the shot times for ' + str(actual_shot)) if pyfusion.VERBOSE > 0: print( '======== fetching url over {dt:.1f} secs from {fr:.1f} to {tt:.1f} =========\n[{u}]' .format(u=url, dt=(params['req_t_u'] - params['req_f_u']) / 1e9, fr=(params['req_f_u'] - shot_f_u) / 1e9, tt=(params['req_t_u'] - shot_f_u) / 1e9)) # seems to take twice as long as timeout requested. # haven't thought about python3 for the json stuff yet # This is not clean - should loop for timeout in [pyfusion.TIMEOUT, 3*pyfusion.TIMEOUT] try: # dat = json.load(urlopen(url,timeout=pyfusion.TIMEOUT)) works # but follow example in # http://webservices.ipp-hgw.mpg.de/docs/howtoREST.html#python # Some extracts in examples/howtoREST.py # dat = json.loads(urlopen(url,timeout=pyfusion.TIMEOUT).read().decode('utf-8')) t_pre = seconds() # for long shots, adjust strategy and timeout to reduce memory consumption ONE = 4 # memory conservation tricks only apply for DEBUG<1 # Thin allows the cutoff value to be increased in come cases # uncomment the following two for testing the exception handler ## timeout = pyfusion.TIMEOUT ## raise httplib.IncompleteRead('test') if (req_t_u - req_f_u) / 1e9 > pyfusion.VERY_LONG: size_MS = 2 * ( req_t_u - req_f_u ) / 1e9 # approximate - later on calc from dt i.e. MSamples if pyfusion.NSAMPLES != 0: # allow for subsampled data size_MS = pyfusion.NSAMPLES / 1e6 timeout = 8 * size_MS + pyfusion.TIMEOUT # don't make timeout too small! print('On-the-fly conversion: Setting timeout to {tmo}'.format( tmo=timeout)) dat = json.load(urlopen(url, timeout=timeout)) t_text = seconds() else: timeout = pyfusion.TIMEOUT txtform = urlopen(url, timeout=timeout).read() t_text = seconds() print('{tm} {tp:.2f} prep, {tt:.2f} fetch without decode, '. format(tm=time.strftime('%H:%M:%S'), tp=t_pre - t_start, tt=t_text - t_pre)), sys.stdout.flush() dat = json.loads(txtform.decode('utf-8')) if pyfusion.DEBUG < ONE: txtform = None # release memory t_conv = seconds() # for 10MS of mirnov 0.06 prep, 9.61 fetch 19.03 conv #print('{tp:.2f} prep, {tt:.2f} fetch {tc:.2f} conv'. print('{tc:.2f} conv'.format(tp=t_pre - t_start, tt=t_text - t_pre, tc=t_conv - t_text)) except socket.timeout as reason: # the following url access is a slightly different form? # should check if this is better tested by the URL module print('{tm} {tp:.2f} prep, {tt:.2f} timeout. '.format( tp=t_pre - t_start, tt=seconds() - t_pre, tm=time.strftime('%H:%M:%S'))), print( '****** first timeout error, try again with longer timeout *****' ) timeout *= 3 dat = json.load(urlopen(url, timeout=timeout)) except MemoryError as reason: raise # too dangerous to do anything else except to reraise except httplib.IncompleteRead as reason: msg = str( '** IncompleteRead after {tinc:.0f}/{timeout:.0f}s ** on {c}: {u} \n{r}' .format(tinc=seconds() - t_start, c=self.config_name, u=url, r=reason, timeout=timeout)) pyfusion.logging.error( msg ) # don't want to disturb the original exception, so raise <nothing> i.e. reraise raise # possibly a memory error really? - not the case for 4114 20180912.48 except Exception as reason: if pyfusion.VERBOSE >= 0: print( '**** Exception (Memory? out of disk space?) OR timeout of {timeout} **** on {c}: {u} \n{r}' .format(c=self.config_name, u=url, r=reason, timeout=timeout)) raise # re raises the last exception # this form will default to repair = 2 for all LP probes. #default_repair = -1 if 'Desc.82/' in url else 0 # Override acq.repair with the probe value default_repair = int(self.repair) if hasattr( self, 'repair') else 2 if 'Desc.82/' in url else 0 # this form follows the config file settings self.repair = int(self.repair) if hasattr(self, 'repair') else default_repair dimraw = np.array(dat['dimensions']) if ('nSamples' not in url): # skip this check if we are decimating if np.abs(req_f_u - dimraw[0]) > 2000: print( '** Start is delayed by >2 us {dtdel:,} relative to the request' .format(dtdel=dimraw[0] - req_f_u)) if (req_t_u - dimraw[-1]) > 2000: print( '** End is earlier by >2 us {dtdel:,} relative to the request' .format(dtdel=req_t_u - dimraw[-1])) output_data_utc = [dat['dimensions'][0], dat['dimensions'][-1]] if pyfusion.DEBUG < ONE: dat['dimensions'] = None # release memory # adjust dim only (not dim_raw so that zero time remains at t1 dim = dimraw - utc_0 # decimation with NSAMPLES will make the timebase look wrong - so disable repair if pyfusion.NSAMPLES != 0 or self.repair == 0 or self.repair == -1: pass # leave as is # need at least this clipping for Langmuir probes in Op1.1 elif self.repair == 1: dim = np.clip(dim, 0, 1e99) elif self.repair == 2: dim, msg = regenerate_dim(dim) if msg is not None: print('shot {s}, {c}: {m}'.format(s=self.shot, c=self.config_name, m=msg)) else: raise ValueError( 'repair value of {r} not understood'.format(r=self.repair)) if pyfusion.VERBOSE > 2: print('repair', self.repair) #ch = Channel(self.config_name, Coords('dummy', (0,0,0))) # this probably should be in base.py coords = get_coords_for_channel(**self.__dict__) # used to be bare_chan? should we include - signs? ch = Channel(self.config_name, coords) scl = 1 / 3277.24 if dat['datatype'].lower() == 'short' else 1 if self.repair == -1: output_data = TimeseriesData(timebase=Timebase(dimraw), signal=scl * Signal(dat['values']), channels=ch) else: output_data = TimeseriesData(timebase=Timebase(1e-9 * dim), signal=scl * Signal(dat['values']), channels=ch) output_data.meta.update({'shot': self.shot}) output_data.utc = output_data_utc # this copy was saved earlier so we could delete the large array to save space output_data.units = dat['units'] if 'units' in dat else '' # this is a minor duplication - at least it gets saved via params params['data_utc'] = output_data.utc params['utc_0'] = utc_0 # hopefully t0 -- useful in npz files # Warning - this could slow things down! - but allows # corrupted time to be re-calculated as algorithms improve. # and storage as differences takes very little space. params['diff_dimraw'] = dimraw params['diff_dimraw'][1:] = np.diff(dimraw) if pyfusion.DEBUG < ONE: dimraw = None # NOTE!!! need float128 to process dimraw, and cumsum won't return ints # or automatically promote to 128bit (neither do simple ops like *, +) params['pyfusion_version'] = pyfusion.version.get_version() if pyfusion.VERBOSE > 0: print('shot {s}, config name {c}'.format(c=self.config_name, s=self.shot)) output_data.config_name = self.config_name debug_(pyfusion.DEBUG, 2, key='W7XDataFetcher') output_data.params = params ### the total shot utc. output_data.utc = [f_u, t_u] return output_data