version=0 # 0 fixed ctrs, 1 evolves """ exec(_var_defaults) #exec(';'.join(_var_defaults.split())) from pyfusion.utils import process_cmd_line_args exec(process_cmd_line_args()) # Slow: around 1000 13 element lists/second - probably could eliminate # more than one at a time. However it may make more sense to look for # clusters - this will work if there are two types of shapes. from pyfusion.data.DA_datamining import DA, report_mem, append_to_DA_file if DAfile != '': da = DA(DAfile, load=1) else: print('try for a da in locals()') try: da except NameError as reason: print(' no da - did you use run -i?') wh = da['indx'] # all of them #wh = np.where((btw(da['freq'],2,4)) & (da['amp']>0.012) )[0] wh = np.where(da['shot'] == 180904035)[0] phs = da['phases'][wh].tolist() inds = da['indx'][wh].tolist() ctrs = np.mean(modtwopi(phs, 0), 0) num = len(ctrs)
""" Example 5 - Figure 7 Adding the Alfven speed for each item into the data set. colour """ from pyfusion.data.DA_datamining import DA import numpy as np import matplotlib.pyplot as plt plt.figure('Example 5 - Figure 7') DA766 = DA('H1_766.npz',load=1) # Load data from 30 shots into a dictionary of arrays (DA) DA766.extract(locals()) # Extract all the data into local variables, such as freq, t_mid, phases[0] wb = np.where((amp>0.05) & (a12>0.7))[0] # find large amplitude, 'rotating' modes (see text). mp = 1.67e-27 mu_0 = 4 * np.pi * 1e-7 V_A = b_0/np.sqrt(mu_0 * n_e * 1e18* mp) # Calculate VA as a local variable , an array of the same size as the # extracted data DA766.update(dict(V_A=V_A)) # Add it to the dictionary - no problem if you use the same name # for the dictionary key DA766.save('H1_766_new') # Save the data set (under a new name if you prefer). # When you load the dataset in the future, the variable V_A will be there. plt.plot(k_h, freq*1e3/V_A,'.c') # symbol '.', colour cyan - we see the values are in the range sqrt beta plt.plot(k_h[wb], freq[wb]*1e3/V_A[wb],'ob',ms=12) # selecting large amplitude as above, we see beginnings of fig 5 plt.xlabel(r'$k_H$'); plt.ylabel(r'${\rm frequency/V_{Alfven}}$',fontdict={'fontsize':'large'}) plt.show() # Note - there will be a warning "invalid value encountered in sqrt" due to noise in n_e # This could be eliminated by selecting using np.where(n_e >= 0)[0]
""" Script to plot probe signals in time for the Marfe shot 52 for Uwe, IAEA """ import matplotlib.pyplot as plt from pyfusion.data.DA_datamining import Masked_DA, DA from pyfusion.data.signal_processing import smooth_n plt.rc('font', size=18, weight='normal') plt.rc('lines', lw=2) DA523=DA('LP/to_uwe/LP20160309_52_L53_2k2.npz') DA513=DA('LP/to_uwe/LP20160309_51_L53_2k2.npz') DA527=DA('LP/to_uwe/LP20160309_52_L57_2k2.npz') DA517=DA('LP/to_uwe/LP20160309_51_L57_2k2.npz') DA7 = DA527 fig, axs = plt.subplots(2, 1, sharey='none', sharex='all') axn, axT = axs for ch in [1,4,5,6,7]: axT.plot(DA7['t'],DA7['Te'][:,ch], label=DA7['info']['channels'][ch],lw=2) axn.plot(DA7['t'],DA7['ne18'][:,ch], label=DA7['info']['channels'][ch],lw=2) axn.legend() axT.legend() axT.set_ylabel('Te (ev)') axn.set_ylabel('ne/10^18') axT.set_ylim(0,40) axn.set_ylim(0,16) axn.set_xlim(0.40, 0.65)
# work through each time segment, extracting the average density during that time for seg in sections: ne_profile.append(np.average(seg.signal,axis=1)) # axis=1 -> average over time, not channel t_mid.append(np.average(seg.timebase)) shot.append(shot_number) except Exception as reason: bads.append([shot_number, reason]) msg = 'skipping shot {s}' if verbose>0: msg += ' because\n {r}' print(msg.format(s=shot_number, r=reason)) # store the data in a DA (Dictionary of Arrays) object, which is like a DataFrame in R or python panda #myDA = pf.data.DA_datamining.Masked_DA( # DAordict = dict(shot=shot, ne_profile=np.array(ne_profile), t_mid=t_mid), valid_keys=['ne_profile'], mask=0) myDA = DA(dict(shot=shot, ne_profile=np.array(ne_profile), t_mid=t_mid)) myDA.masked = Masked_DA(valid_keys=['ne_profile'], baseDA=myDA) myDA.da['mask'] = np.ones(shape=np.shape(myDA[myDA.masked.valid_keys[0]])).astype(np.uint8) channels = [ch.name.split(':')[-1] for ch in seg.channels] myDA.infodict.update(dict(channels = channels)) for (c, ch) in enumerate(channels): if np.any([ex in ch for ex in exclude]): # if any of exclude in that channel myDA.da['mask'][:,c] = 0 myDA.save('/tmp/density_scan') # the next step - write to arff # myDA.write_arff('ne_profile.arff',['ne_profile']) import matplotlib.pyplot as plt def pl(array, comment=None,**kwargs): plt.figure(num=comment) # coment written to window title plt.plot(array, **kwargs)
tech = echdata.timebase[wech[0]] t0_utc = int(tech * 1e9) + echdata.utc[0] dt = (probedata.utc[0] - t0_utc)/1e9 dtece = (ECEdata.utc[0] - t0_utc)/1e9 # gas_g=[3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1] # time_g=[0, 0.099, 0.1, 0.155, 0.156, 0.162, 0.163, 0.196, 0.197, 0.211, 0.212, 0.24, 0.241, 0.266, 0.267, 0.290, 0.291, 0.327, 0.328, 0.341, 0.342, 0.8] # Corrected using Maciej's email time_g = [0, 0.099, 0.1, 0.149, 0.150, 0.160, 0.161, 0.191, 0.191, 0.210, 0.211, 0.230, 0.231, 0.260, 0.261, 0.280, 0.281, 0.320, 0.321, 0.340, 0.341, 0.8] gas_g = [3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1] # LP5342.process_swept_Langmuir(t_range=[0.9,1.4],dtseg=.002,initial_TeVpI0=dict(Te=20,Vp=0,I0=None),filename='20160218_5_L532_short') #da532=DA('20160218_5_L532_short.npz') base = str('LP/LP{d}_{s}_L'.format(d=shot_number[0], s=shot_number[1])) # fall back to 4ms versions if no 2ms version try: da532 = DA(base+'532.npz') except IOError: da532 = DA(base+'53.npz') try: da572 = DA(base+'572.npz') except IOError: da572 = DA(base+'57.npz') ch1=0 plt.plot(da572['t_mid']+dt, da572['ne18'][:,ch1],hold=0,label='ne18 s'+ da572['info']['channels'][ch1][3:]) ch2=1 plt.plot(da532['t_mid']+dt, da532['ne18'][:,ch2],label='ne18 s'+ da532['info']['channels'][ch2][3:]) #plt.plot(np.array(time_g), 0.8+(np.array(gas_g)-3),label='gas valve') plt.plot(ECEdata.timebase + dtece, ECEdata.signal/1000,label='ECE_13',color='red', lw=0.3) plt.plot(echdata.timebase - tech, echdata.signal/1000,label='ECH (MW)',color='magenta', lw=0.3) plt.legend()
#run pyfusion/examples/small_65.py import numpy as np import pylab as pl import pyfusion from pyfusion.data.DA_datamining import DA from pyfusion.data.convenience import whr, btw, his, decimate size_scale=30 cind = 0 colorset=('b,g,r,c,m,y,k,orange,purple,lightgreen,gray'.split(',')) # to be rotated DA65MPH=DA('DA65MP2010HMPno612b5_M_N_fmax.npz',load=1) DA65MPH.extract(locals()) pyfusion.config.set('Plots',"FT_Axis","[0.5,4,0,80000]") """ run -i pyfusion/examples/plot_specgram.py shot_number=65139 channel_number=1 NFFT=1024 pl.set_cmap(cm.gray_r) pl.clim(-60,-0) """ sc_kw=dict(edgecolor='k',linewidth = 0.3) for n in (-1,0,1): for m in (-2, -1,1,2): w =np.where((N==n) & (M==m) & (_binary_svs < 99) & btw(freq,frlow,frhigh))[0] if len(w) != 0: col = colorset[cind] pl.scatter(t_mid[w], freq[w], size_scale*amp[w], color=col, label='m,n=~{m},{n}'.format(m=m, n=n),**sc_kw) cind += 1 w=np.where((_binary_svs < 99) & btw(freq,frlow,frhigh) & btw(MM, 0,130) & (NN== -4))[0] col = colorset[cind] ; cind+=1
maxshot = 999999 # higher than even LHD shot_list = [] diags = diag_basic diags_scalar = "b_0,R_ax,Quad,Gamma".split(',') filename = 'DA_file.npz' import pyfusion.utils exec(pyfusion.utils.process_cmd_line_args()) # now to merge the two. if len(np.shape(diags)) == 0: diags = [diags] try: dd except: print('trying file ' + filename) thisDA = DA(filename, load=1) thisDA.info() dd = thisDA.da sz = len(dd['shot']) missing_shots = [] good_shots = [] ctr = 0 if len(shot_list) == 0: shots = np.unique(dd['shot']) wgt = np.where((shots >= minshot) & (shots <= maxshot)) shot_list = shots[wgt] for shot in shot_list:
maxshot=999999 # higher than even LHD shot_list = [] diags=diag_basic diags_scalar="b_0,R_ax,Quad,Gamma".split(',') filename='DA_file.npz' import pyfusion.utils exec(pyfusion.utils.process_cmd_line_args()) # now to merge the two. if len(np.shape(diags)) == 0: diags = [diags] try: dd except: print('trying file ' + filename) thisDA = DA(filename,load=1) thisDA.info() dd = thisDA.da sz = len(dd['shot']) missing_shots = [] good_shots =[] ctr=0 if len(shot_list)==0: shots = np.unique(dd['shot']) wgt = np.where((shots >= minshot) & (shots <= maxshot)) shot_list = shots[wgt] for shot in shot_list:
# more trouble than its worth? for example, mode_list=None overides its effect #exec(pyfusion.utils.process_cmd_line_args()) if mask is None: mask = np.identity(len(sel)) if DAfilename is None: raise ValueError('New version - no need for DAfilename==None') try: type(thisDA) except: thisDA=None if (thisDA is None) or thisDA.name != DAfilename: print("reading in {d}".format(d=DAfilename)) from pyfusion.data.DA_datamining import DA, report_mem thisDA = DA(DAfilename,load=1) # wasteful for large files: dd=thisDA.copyda() dd = thisDA.da if clear_modes: old_modes = {} if verbose>0: print('clearing modes') for key in 'N,NN,M,MM,mode_id'.split(','): #print(key, len(dd[key])) old_modes.update({key: dd.pop(key,None)}) #clear all the mode keys if not isinstance(dd, dict): raise LookupError("dd not loaded into memory - can't store") if mode is None: mode = mode_list[0]
from matplotlib import pyplot as plt from pyfusion.data.DA_datamining import DA, report_mem, append_to_DA_file from pyfusion.data.convenience import between, bw, btw, decimate, his, broaden # paste NOt %PASTE #_PYFUSION_TEST_@@SCRIPT run -i pyfusion/examples/gen_fs_bands.py dev_name='W7X' diag_name=W7X_MIRNOV_41_BEST_LOOP shot_range="[[20180912,s] for s in range(43,44)]" max_bands=1 info=0 min_svs=2 max_H=0.999 min_p=0 exception=() outfile='W7X_MIR/preproc/201809/PMIRNOV_41_BEST_LOOP_10_3m_20180912043' fmax=10e3 seg_dt=3e-3 min_svs=2 # clean up not usually needed run -i pyfusion/examples/clean_up_pyfusion_text_mp.py MP=0 fileglob="'W7X_MIR/preproc/201809/P*'" run -i pyfusion/examples/merge_text_pyfusion.py file_list="np.sort(glob('W7X_MIR/preproc/201809/PMIRNOV_41_BEST_LOOP_10_3m_20180912043'))" run pyfusion/examples/plot_specgram.py diag_name='W7X_MIR_4136' shot_number=[20180912,43] NFFT=2048*4 plot_fs_DA(dd);pl.ylim(0,30) from pyfusion.data.DA_datamining import DA, report_mem, append_to_DA_file DA43MIRNOV_13_BEST_LOOP=DA(dd) DA43MIRNOV_13_BEST_LOOP.save('/tmp/DAMIRNOV_41_13_BEST_LOOP_10_3ms_20180912043.npz") dd = DA("DAMIRNOV_41_13_BEST_LOOP_10_3ms_20180912043.npz") w=where((btw(dd['freq'],2,4)) & (dd['amp']>0.012) )[0] plt.plot(dd['phases'][w].T) # prettier plots plt.rcParams['font.size']=20 phs=array([modtwopi(ph,offset=-1) for ph in dd['phases']]) w3=where((btw(dd['freq'],2,4)) & (dd['amp']>0.012) )[0] plt.plot(phs[w3].T) plt.figure() w6=where((btw(dd['freq'],5,7)) & (dd['amp']>0.015) )[0];len(w6) plt.plot(phs[w6].T) title('6kHz mode') ylabel('phase dif n, n-1') xlabel('probe pairs (n,n-1)')
dt = (probedata.utc[0] - t0_utc) / 1e9 # gas_g=[3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1] # time_g=[0, 0.099, 0.1, 0.155, 0.156, 0.162, 0.163, 0.196, 0.197, 0.211, 0.212, 0.24, 0.241, 0.266, 0.267, 0.290, 0.291, 0.327, 0.328, 0.341, 0.342, 0.8] # Corrected using Maciej's email time_g = np.array([ 0, 0.099, 0.1, 0.149, 0.150, 0.160, 0.161, 0.191, 0.191, 0.210, 0.211, 0.230, 0.231, 0.260, 0.261, 0.280, 0.281, 0.320, 0.321, 0.340, 0.341, 0.8 ]) gas_g = np.array([ 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1 ]) # LP5342.process_swept_Langmuir(t_range=[0.9,1.4],dtseg=.002,initial_TeVfI0=dict(Te=20,Vf=0,I0=None),filename='20160309_42_L532_short') #da532=DA('20160309_42_L532_short.npz') da532 = DA('LP/LP20160309_42_L53_2k2.npz') #da532=DA('LP/LP20160309_52_L53_2k2.npz') #da532=DA('LP/LP20160309_44_L53.npz') da572 = DA('LP/LP20160309_42_L57_2k2.npz') #da572=DA('LP/LP20160309_52_L57_2k2.npz') # offsets for medium text, not including dot size, second element is the probe number to avoid voffs = dict(LP17=[.002, 20], LP10=[.002, 6], LP09=[0.001, 5], LP06=[-.001, 1], LP12=[.002, 14]) fig, (axcorr, axmap) = plt.subplots(2, 1) da = da532 chans = np.array(da['info']['channels'])
#!/usr/bin/env python # this won't work from ~/bin unless we include pyfusion in PYTHONPATH #import os #print(os.path.dirname(__file__)) import matplotlib.pyplot as plt import sys sys.path.append('/home/bdb112/pyfusion/working/pyfusion/') from pyfusion.data.DA_datamining import DA if len(sys.argv) < 2: print('plot_npz_data "filename" key') else: filename = sys.argv[1] if len(sys.argv) > 2: key = sys.argv[2] else: key = 'ne18' da = DA(filename,load=1) da.plot(key) plt.show(1)
# time_g=[0, 0.099, 0.1, 0.155, 0.156, 0.162, 0.163, 0.196, 0.197, 0.211, 0.212, 0.24, 0.241, 0.266, 0.267, 0.290, 0.291, 0.327, 0.328, 0.341, 0.342, 0.8] # Corrected using Maciej's email time_g = [ 0, 0.099, 0.1, 0.149, 0.150, 0.160, 0.161, 0.191, 0.191, 0.210, 0.211, 0.230, 0.231, 0.260, 0.261, 0.280, 0.281, 0.320, 0.321, 0.340, 0.341, 0.8 ] gas_g = [ 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1, 3.7, 3.7, 3.1, 3.1 ] # LP5342.process_swept_Langmuir(t_range=[0.9,1.4],dtseg=.002,initial_TeVpI0=dict(Te=20,Vp=0,I0=None),filename='20160218_5_L532_short') #da532=DA('20160218_5_L532_short.npz') base = str('LP/LP{d}_{s}_L'.format(d=shot_number[0], s=shot_number[1])) # fall back to 4ms versions if no 2ms version try: da532 = DA(base + '532.npz') except IOError: da532 = DA(base + '53.npz') try: da572 = DA(base + '572.npz') except IOError: da572 = DA(base + '57.npz') ch1 = 0 plt.plot(da572['t_mid'] + dt, da572['ne18'][:, ch1], hold=0, label='ne18 s' + da572['info']['channels'][ch1][3:]) ch2 = 1 plt.plot(da532['t_mid'] + dt, da532['ne18'][:, ch2],
skipped = [] figs = [] num = None # None auto numbers figures figsize = (10, 7) if len(figs) > 0 else plt.rcParams[ 'figure.figsize'] # first figure is rc default size fig, (axu, axl) = plt.subplots(2, 1, sharex=True, num=num, figsize=np.array(figsize)[::-1]) # see N2_puff_correlation for the vertical offsets of text point labels for ax, seg in zip([axu, axl], ['3', '7']): sgn = None da = DA(dafile.replace('SEG', seg)) if (len(da['Te'][0]) < minpts): raise LookupError( 'fewer channels ({nc}) than minpts'.format(nc=len(da['Te'][0]))) try: if 'params' in da['info']: if da['info']['params']['pyfusion_version'] > '0.6.7b': areas = 'approximate' else: print('*** Warning - incorrect limiter numbers? ***') except: print('******* Really Old data file??***********') # take care of obsolete names ne = 'ne18' if 'ne18' in da.da else 'ne' Vf = 'Vf' if 'Vf' in da.da else 'Vp'
print('Only {n} files found for {fl}'.format(fl=file_list, n=len(file_list))) (ds_list, comment_list) = read_text_pyfusion(file_list, debug=debug, exception=exception, target=target) if len(ds_list) == 0: raise LookupError('no valid files found in the {n} files in {f}' .format(f=file_list,n=len(file_list))) if isinstance(file_list_in, str): comment_list = [file_list_in] + comment_list if append_old_method: ds_list.append(dd) comment_list.extend(dd['info']['comment']) if append: dd = merge_ds(ds_list, comment_list, old_dd=dd) else: dd = merge_ds(ds_list, comment_list) if save_filename != "None": from pyfusion.data.DA_datamining import DA DAtest=DA(dd) DAtest.info() DAtest.save(save_filename)
if len(file_list)<10: print('Only {n} files found for {fl}'.format(fl=file_list, n=len(file_list))) (ds_list, comment_list) = read_text_pyfusion(file_list, debug=debug, exception=exception, target=target) if len(ds_list) == 0: raise LookupError('no valid files found in the {n} files in {f}' .format(f=file_list,n=len(file_list))) if append_old_method: ds_list.append(dd) comment_list.extend(dd['info']['comment']) if append: dd = merge_ds(ds_list, comment_list, old_dd=dd) else: dd = merge_ds(ds_list, comment_list) if save_filename != "None": from pyfusion.data.DA_datamining import DA DAtest=DA(dd) DAtest.info() DAtest.save(save_filename)
_var_defaults = """ DAfilename='../../datamining/DA/PF2_130813_50_5X_1.5_5b_rms_1_diags.npz' outfilename=None keep_open = 0 complevel=2 complib = 'zlib' # 'blosc' is fastest, but zlib can be compressed further "offline" # with blosc, get dataset </w_p> cannot be read, user defined filter is not available var='phases' # ?? maybe was used to develop """ exec(_var_defaults) exec(process_cmd_line_args()) filters = tb.Filters(complevel=complevel, complib=complib) dd = DA(DAfilename).da if outfilename is None: (base, ext) = os.path.splitext(os.path.realpath(DAfilename)) outfilename = base + os.path.extsep + 'h5' outf = tb.openFile(outfilename, "a") for var in dd.keys(): st_copy = seconds() if var in [nd.name for nd in outf.listNodes('/')]: raise LookupError('{f} already has a node "{n}"'.format( f=outf.filename, n=var)) val = dd[ var] # need to hold it in memory this way to avoid multiple access
hold=1 shot_number = sht cmap=cm.jet # see also cm.gray_r etc tm=localtime() hdr = str('PF2_{yy:02d}{mm:02d}{dd:02d}_' .format(yy=tm.tm_year-2000,mm=tm.tm_mon,dd=tm.tm_mday,hh=tm.tm_hour)) # a local da wins, if not there then try DA_file try: da except: print('No da, check if there is a file') if DA_file is not None: try: da = DA(DA_file,load=1) except: print('DA_file {df} not found'.format(df=DA_file)) flucfiles = '{hdr}*{sht}*'.format(sht=sht,hdr=hdr) if not sht in da.da['shot']: print('shot {sht} not found, highest in {n} is {h}' .format(sht=sht,n=da.name,h=np.max(da.da['shot']))), # look for the flucfile remotely print(' acessing remote - may hang if sshfs') if len(glob('/h1svr2/tmp/'+flucfiles))>0: flucfiles = '/h1svr2/tmp/' + flucfiles remerge=1 # print('using remote files')
clearfigs = 1 # set to 0 to overlay (not great) """ exec(_var_defaults) from pyfusion.utils import process_cmd_line_args exec(process_cmd_line_args()) try: da if oldDAfilename != DAfilename: 1 / 0 # create an error to force reload print("Using old data") except: print("loading {f}".format(f=DAfilename)) da = DA(DAfilename) oldDAfilename = DAfilename da.extract(locals(), "shot,phases,beta,freq,frlow,frhigh,t_mid,amp,a12") print("loading {f}".format(f=clusterfile)) x = np.load(clusterfile) for k in x.keys(): exec("{v}=x['{k}']".format(v=k, k=k)) start_mem = report_mem(msg="cluster_phases") w5 = np.where((dists(subset[clinds[cl][0]], phases[:, sel]) < d_big) & (bw(freq, frlow, frhigh)) & (shot == shot))[0] print(len(w5), len(unique(shot[w5]))) ph5 = phases[w5] wc = np.where(dists(subset[clinds[cl][0]], ph5[:, sel]) < d_med)[0] wcc = np.where(dists(subset[clinds[cl][0]], ph5[:, sel]) < d_sml)[0]
nl = '\n' ofilename = None LPfile = 'LP/20160224_25_L53.npz' if len(sys.argv) > 1 and sys.argv[1][0] != '-': LPfile = sys.argv[1] if ofilename is None: path, nameext = os.path.split(LPfile) (name, ext) = os.path.splitext(nameext) ofilename = os.path.join(path, name + '.csv') jfilename = os.path.join(path, name + '.json') try: from pyfusion.data.DA_datamining import DA da = DA(LPfile) dat = da.da masked = da.masked except ImportError: raise datnpz = np.load(LPfile) dat = {} for k in datnpz: dat[k] = datnpz[k].tolist() masked = dat info = dat.get('info') params = info['params'] channels = info['channels']
from pyfusion.data.DA_datamining import DA from matplotlib import pyplot as plt da9 = DA('LP20160309_42_L57_errest_cov_lpf=9.npz') da99 = DA('LP20160309_42_L57_errest_cov_lpf=99.npz') axs = da9.plot('Te', sharey='all') for i in range(len(da99['info']['channels'])): axs[i].plot(da99['t_mid'], da99.masked['Te'][:, i]) plt.ylim(0, 60) plt.show(0)
""" A VERY small MP+HMP set, just shot 65139 - note that the copy and the extract() both duplicate data, which is good for development and debugging, but wasteful of space. """ from pyfusion.data.DA_datamining import DA from pyfusion.utils.utils import fix2pi_skips, modtwopi from pyfusion.visual.sp import sp from pyfusion.data.convenience import between, btw, bw, decimate, his, broaden _var_defaults=""" DAfilename='DA65MP2010HMPno612b5_M_N_fmax.npz' """ exec(_var_defaults) from pyfusion.utils import process_cmd_line_args exec(process_cmd_line_args()) DA65=DA(DAfilename,load=1) dd=DA65.copyda() DA65.extract(locals()) DA65.info() print('DA65', DAfilename)
import numpy as np from pyfusion.data.DA_datamining import Masked_DA, DA from matplotlib import pyplot as plt dal = DA ('LP/LP20170920_7_UTDU_2k8.npz') I0arr = dal['I0'] wbad = np.where((I0arr > .02) | (I0arr < 0))[0] rejected = float(len(wbad))/len(I0arr.flatten()) print('{n} rejected ({pc:.0f}%)'.format(n=wbad, pc=rejected*100)) I0arr[wbad] = np.nan IUpper = np.nanmean(I0arr, axis=0) plt.step(range(len(IUpper)), IUpper) plt.show(0)
""" extract selected shots from a pytables hdf5 file Algorithm should be applicable for any 'on disk' variable format """ import pyfusion from pyfusion.data.DA_datamining import DA import numpy as np dd = DA('$DA/DAHJ59k.npz', load=1).copyda() shots = dd['shot'] ushots = np.unique(shots) np.random.seed(0) # ensure repeatability = # chose 10 randomly myshots = ushots[np.random.uniform(len(ushots), size=10).astype(int)] inds = [] for shot in myshots: inds.extend(np.where(shots == shot)[0]) # 1,2,3,4,6,7,8, #diff 1,1,1,2,1,1,1 #ones 1,1,1,0,1,1,1 #ups 3 #downs 2, # sort in the order of the table - not necessarily shot order inds = np.sort(inds) ones = (np.diff(inds) == 1).astype(int)
clearfigs = 1 # set to 0 to overlay (not great) """ exec(_var_defaults) from pyfusion.utils import process_cmd_line_args exec(process_cmd_line_args()) try: da if oldDAfilename != DAfilename: 1 / 0 # create an error to force reload print('Using old data') except: print('loading {f}'.format(f=DAfilename)) da = DA(DAfilename) oldDAfilename = DAfilename da.extract(locals(), 'shot,phases,beta,freq,frlow,frhigh,t_mid,amp,a12') print('loading {f}'.format(f=clusterfile)) x = np.load(clusterfile) for k in x.keys(): exec("{v}=x['{k}']".format(v=k, k=k)) start_mem = report_mem(msg='cluster_phases') w5 = np.where((dists(subset[clinds[cl][0]], phases[:, sel]) < d_big) & (bw(freq, frlow, frhigh)) & (shot == shot))[0] print(len(w5), len(unique(shot[w5]))) ph5 = phases[w5] wc = np.where(dists(subset[clinds[cl][0]], ph5[:, sel]) < d_med)[0] wcc = np.where(dists(subset[clinds[cl][0]], ph5[:, sel]) < d_sml)[0]
exec(pf.utils.process_cmd_line_args()) # prepare an empty lists for data - lists are easy to append to ne_profile = [] shot = [] t_mid = [] dev = pf.getDevice(device_name) # open the device (choose the experiment) for shot_number in shot_range: try: d = dev.acq.getdata(shot_number, ne_set) if time_range != None: d.reduce_time(time_range) sections = d.segment(n_samples, overlap) print(d.history, len(sections)) for ss,t_seg in enumerate(sections): ne_profile.append(np.average(t_seg.signal,axis=1)) t_mid.append(np.average(t_seg.timebase)) shot.append(shot_number) except exception as reason: print 'Error {e} on shot {s}'.format(e=reason, s=shot) # make the dictionary of arrays and put it in a DA object myDA = DA(dict(shot=shot, ne_profile=ne_profile, t_mid=t_mid)) myDA.save('ne_profile')
_var_defaults = """ DAfilename='../../datamining/DA/PF2_130813_50_5X_1.5_5b_rms_1_diags.npz' outfilename=None keep_open = 0 complevel=2 complib = 'zlib' # 'blosc' is fastest, but zlib can be compressed further "offline" # with blosc, get dataset </w_p> cannot be read, user defined filter is not available var='phases' # ?? maybe was used to develop """ exec(_var_defaults) exec(process_cmd_line_args()) filters=tb.Filters(complevel=complevel, complib=complib) dd = DA(DAfilename).da if outfilename is None: (base, ext) = os.path.splitext(os.path.realpath(DAfilename)) outfilename = base + os.path.extsep + 'h5' outf = tb.openFile(outfilename, "a") for var in dd.keys(): st_copy = seconds() if var in [nd.name for nd in outf.listNodes('/')]: raise LookupError('{f} already has a node "{n}"' .format(f=outf.filename, n=var)) val = dd[var] # need to hold it in memory this way to avoid multiple access sizebytes = val.nbytes
""" A small MP+HMP set, just shot 65139 - note that the copy and the extract() both duplicate data, which is good for development and debugging, but wasteful of space. """ from pyfusion.data.DA_datamining import DA from pyfusion.utils.utils import fix2pi_skips, modtwopi from pyfusion.visual.sp import sp from pyfusion.data.convenience import between, bw, btw, decimate, his, broaden _var_default = """ DAfilename='300_small.npz' """ exec(_var_default) from pyfusion.utils import process_cmd_line_args exec(process_cmd_line_args()) DA300 = DA(DAfilename, load=1) dd = DA300.copyda() DA300.extract(locals()) DA300.info() print('DA300', DAfilename)
print('Selecting before clustering ') phases = phases[sel, :] # WAS phases[:,sel] - how did it ever work? for k in misc.keys(): misc.update({k: misc[k][sel]}) # this was missing too fo = clust.feature_object(phases, misc) co = fo.cluster(method='EM_VMM', n_clusters=n_clusters, n_iterations=n_iterations, start='k_means', n_cpus=n_cpus, number_of_starts=number_of_starts) co.plot_clusters_phase_lines() # show clusters # extract the meta data corresponding to the instances selected DA(DAfilename, load=1).extract(locals(), inds=co.feature_obj.misc_data_dict['serial']) while cl >= 0: cl = int(input('\n cluster number to view, -1 or ^C to quit? ')) w = co.members(cl) pl.figure('cluster {cl}'.format(cl=cl)) pl.scatter(t_mid[w], freq[w], 500 * amp[w]) pl.ylim(0, max(freq) / 2) pl.show(0) """ # simplest possible version run pyfusion/examples/medium_300.py import pyfusion.clustering as clust phases=DA300.da.pop('phases') fo = clust.feature_object(phases, DA300.da)
#!/usr/bin/env python """ Show the info dict of a DA_datamining npz file This won't work from ~/bin unless we include pyfusion in PYTHONPATH - do this with a crude hack below import os print(os.path.dirname(__file__)) _PYFUSION_TEST_@@pyfusion/examples/JSPF_tutorial/H1_766.npz _PYFUSION_TEST_@@pyfusion/examples/JSPF_tutorial/LP20160309_52_L53_2k2short.npz """ import matplotlib.pyplot as plt import sys sys.path.append('/home/bdb112/pyfusion/working/pyfusion/') from pyfusion.data.DA_datamining import DA if len(sys.argv) < 2: raise Exception('Syntax is: DA_info "filename"') else: filename = sys.argv[1] from pyfusion.data.DA_datamining import Masked_DA, DA da = DA(filename) print(da['info'])
It is more effective (e.g. Figure 5) to use von Mises mixtures. This (vMMM) code is under development - contact the authors for the latest copy. Takes about 1 minute Python3 produces a different clustering to Python2 - not sure why? """ from sklearn import mixture from pyfusion.data.DA_datamining import DA, report_mem import numpy as np import matplotlib.pyplot as plt # approx size used in pub figure(figsize=((11.9,7.6))) plt.figure('Example 6 - Figure 8') DA76 = DA('H1_766.npz',load=1) DA76.extract(locals()) np.random.seed(0) # ensure the same result (useful for examples) gmm = mixture.GMM(n_components=16, covariance_type='spherical') m16 = gmm.fit(phases) # fit 16 Gaussians cids = m16.predict(phases) # assign each point to a cluster id for c in [7, 9]: # show the two most interesting clusters in freq vs k_h # select cluster members of sufficient amplitude, a12, and after 5ms w = np.where((c == cids) & (amp > 0.08) & (a12 > 0.5) & (t_mid > 0.005))[0] # add artificial noise to the k_h value to show points 'hidden' under others dither = .008 * np.random.random(len(w)) # colored by cluster plt.scatter(k_h[w] + dither, np.sqrt(ne_1[w])*freq[w], 700*amp[w], 'bgrcmkycmrgrcmykc'[c])
alldas = [] for ts in np.linspace(tstart, tstart + seg_len * dt, nfull, endpoint=False): results = LP.process_swept_Langmuir(t_range=[ts, tend], dtseg=seg_len, plot_DA=plot_DA, **kwargs) if hasattr(LP, 'da'): alldas.append(LP.da) # this one must start at a V maximum half_kwargs = dict(dtseg=seg_len // 2, **kwargs) fit_params = half_kwargs['fit_params'] cycavg_h = fit_params['cycavg'] if cycavg_h is not None and (seg_len // 2) < cycavg_h[0]: cycavg_h = [seg_len // 2, 1, cycavg_h[2]] fit_params.update(dict(cycavg=cycavg_h)) half_kwargs.update(dict(fit_params=fit_params)) for ts in np.linspace(tstart, tstart + seg_len * dt, nhalf, endpoint=False): results = LP.process_swept_Langmuir(t_range=(ts, tend), plot_DA=plot_DA, **half_kwargs) if hasattr(LP, 'da'): alldas.append(LP.da) bigda = DA(alldas[0].copyda()) for da in alldas[1:]: bigda.append(da)
cmap = cm.jet # see also cm.gray_r etc tm = localtime() hdr = str('PF2_{yy:02d}{mm:02d}{dd:02d}_'.format(yy=tm.tm_year - 2000, mm=tm.tm_mon, dd=tm.tm_mday, hh=tm.tm_hour)) # a local da wins, if not there then try DA_file try: da except: print('No da, check if there is a file') if DA_file is not None: try: da = DA(DA_file, load=1) except: print('DA_file {df} not found'.format(df=DA_file)) flucfiles = '{hdr}*{sht}*'.format(sht=sht, hdr=hdr) if not sht in da.da['shot']: print('shot {sht} not found, highest in {n} is {h}'.format( sht=sht, n=da.name, h=np.max(da.da['shot']))), # look for the flucfile remotely print(' acessing remote - may hang if sshfs') if len(glob('/h1svr2/tmp/' + flucfiles)) > 0: flucfiles = '/h1svr2/tmp/' + flucfiles remerge = 1 # print('using remote files') else: reprocess = 1
dafile=sys.argv[1] if len(sys.argv)>2: minpts=int(sys.argv[2]) if not(os.path.exists(dafile)): print('try LP/ folder') dafile = 'LP/' + dafile try: if da.name == dafile: print('NOT reloading file - run without -i to force reload') else: raise NameError except NameError: da = DA(dafile) areas = 'uncalibrated' if (len(da['Te'][0])<minpts): raise LookupError('fewer channels ({nc}) than minpts'. format(nc = len(da['Te'][0]))) try: if 'params' in da['info']: if da['info']['params']['pyfusion_version'] > '0.6.7b': areas = 'approximate' else: print('*** Warning - incorrect limiter numbers? ***') except: print('******* Really Old data file??***********')
from pyfusion.data.DA_datamining import DA da9=DA('LP20160309_42_L57_errest_cov_lpf=9.npz') da99=DA('LP20160309_42_L57_errest_cov_lpf=99.npz') axs=da9.plot('Te',sharey='all') for i in range(len(da99['info']['channels'])): axs[i].plot(da99['t_mid'],da99.masked['Te'][:,i]) ylim(0,60) show()
""" A VERY small MP+HMP set, just shot 65139 - note that the copy and the extract() both duplicate data, which is good for development and debugging, but wasteful of space. """ from pyfusion.data.DA_datamining import DA from pyfusion.utils.utils import fix2pi_skips, modtwopi from pyfusion.visual.sp import sp from pyfusion.data.convenience import between, btw, bw, decimate, his, broaden _var_default = """ DAfilename='DA65MP2010HMPno612b5_M_N_fmax.npz' """ exec(_var_default) from pyfusion.utils import process_cmd_line_args exec(process_cmd_line_args()) DA65 = DA(DAfilename, load=1) dd = DA65.copyda() DA65.extract(locals()) DA65.info() print('DA65', DAfilename)
""" A small MP+HMP set, just shot 65139 - note that the copy and the extract() both duplicate data, which is good for development and debugging, but wasteful of space. """ from pyfusion.data.DA_datamining import DA from pyfusion.utils.utils import fix2pi_skips, modtwopi from pyfusion.visual.sp import sp from pyfusion.data.convenience import between, bw, btw, decimate, his, broaden _var_default = """ DAfilename='300_small.npz' """ exec(_var_default) from pyfusion.utils import process_cmd_line_args exec(process_cmd_line_args()) DA300 = DA(DAfilename, load=1) dd = DA300.copyda() DA300.extract(locals()) DA300.info() print("DA300", DAfilename)
def write_DA(self, filename): from pyfusion.data.DA_datamining import DA, Masked_DA dd = {} res = np.array(self.fitdata, dtype=np.float32) nt = len(res) nc = len(res[0]) for key in ['date', 'progId', 'shot']: dd[key] = np.zeros(nt, dtype=np.int64) dd['date'][:] = self.shot[0] dd['progId'][:] = self.shot[1] dd['shot'][:] = self.shot[1] + 1000*self.shot[0] for key in ['nits','maxits']: dd[key] = np.zeros([nt,nc], dtype=np.uint16) # make all the f32 arrays - note - ne is just I0 for now - fixed below lookup = [(0, 't_mid'), (1, 'Te'), (2, 'Vf'), (3, 'I0'), (4, 'resid'), (5, 'nits'), (6, 'maxits'), (7, 'Ie_Ii'), (3, 'ne18')] if self.fitter.fit_params.get('esterr',False): lookup.extend([(8, 'eTe'), (9, 'eVf'), (10, 'eI0') ]) for (ind, key) in lookup: if key not in dd: dd[key] = np.zeros([nt, nc], dtype=np.float32) dd[key][:] = res[:, :, ind] # fudge t_mid is not a vector...should fix properly dd['t_mid'] = dd['t_mid'][:, 0] dd['info'] = dict(params=self.actual_params, coords=[self.coords[ic] for ic in self.select], #area=[self.area[ic] for ic in self.select], # needs to be in npz file etc first shotdata=dict(shot=[self.shot], utc_ns=[self.imeas.utc[0]]), channels=[chn.replace(self.dev.name+'_', '') .replace('_I', '') for chn in [self.i_chans[ic] for ic in self.select]], orig_name = os.path.split(filename)[-1], username = os.getlogin()) da = DA(dd) da.masked = Masked_DA(['Te', 'I0', 'Vf', 'ne18', 'Ie_Ii'], baseDA=da) # da.da['mask']=(da['resid']/abs(da['I0']) < .7) & (da['nits']<100) # da.da['mask'] = ((da['resid']/abs(da['I0']) < .7) & (da['nits'] < da['maxits']) # from version 0.7.0 onwards, resid is already normed to I0 lpf = self.fitter.actual_fparams['lpf'] # Note: these multilines ('down to here') can be applied to a DA by # pasting to reset mask AFTER uncommenting the following # line # lpf = da['info']['params']['actual_fit_params']['lpf'] rthr = 0.7 # LP20160309_29_L53__amoebaNone1.2N_2k.npz is < .12 others # None 0310_9 up to 0.7-0.8 if lpf is not None: rthr = rthr * np.sqrt(lpf/100.0) da.da['mask'] = ((da['resid'] < rthr) & (da['nits'] < da['maxits']) & (np.abs(da['Vf']) < 200) & (np.abs(da['Te']) < 200) & (da['I0']>0.0004)) # additional restriction applied if the error estimate is available if 'eTe' in da.da: # want error not too big and smaller than temp da.da['mask'] &= ((np.abs(da['eTe']) < 100) & (np.abs(da['eTe']) < np.abs(da['Te']))) # down to here qe = 1.602e-19 mp = 1.67e-27 fact = 1/(0.6*qe)*np.sqrt(self.amu*mp/(qe))/1e18 # units of 1e18 # check if each channel has an area for (c, chn) in enumerate([self.i_chans[ic] for ic in self.select]): cd = get_config_as_dict('Diagnostic', chn) A = cd.get('area', None) if A is None: A = 1.0e-6 pyfusion.logging.warn('Defaulting area for {chn} to {A}'.format(chn=chn, A=A)) A = float(A) da.da['ne18'][:, c] = fact/A * da['I0'][:, c]/np.sqrt(da['Te'][:, c]) da.save(filename)
# The Initial paste %run pyfusion/examples/cluster_info.py figure();co.plot_clusters_phase_lines() figure();co.plot_kh_freq_all_clusters();ylim(0,80) from pyfusion.data.DA_datamining import DA, report_mem, append_to_DA_file DA_7677=DA('DA_7677.npz') DA_7677.extract(locals()) sht=76670 #86514 # 76670 # 672, 616 # typically paste from here down thr = 1.5 scl=1500 clim=None figure() boydsdata=1 if boydsdata: ph = -phases else: ph = phases w15=where((ml[15].one_rms(ph)<thr) & (shot==sht))[0];len(w15) w4=where((ml[4].one_rms(ph)<thr) & (shot==sht))[0];len(w4) w=union1d(w4,w15) if len(w)>0: scatter(t_mid[w],freq[w],scl*amp[w],c='b',label='n=5/m=4') w6=where((ml[6].one_rms(ph)<thr) & (shot==sht))[0];len(w6) w1=where((ml[1].one_rms(ph)<thr) & (shot==sht))[0];len(w1) w10=where((ml[10].one_rms(ph)<thr) & (shot==sht))[0];len(w10) w=union1d(w1,w10) if len(w)>0: scatter(t_mid[w],freq[w],scl*amp[w],c='g',label='n=4/m=3')
Using this mode, you could write more concisely plt.plot(H1.t_mid, H1.freq) Which is really nice and simple, unlike the messy iteration in this example. [row['t_mid'] for row in alldata] Queries would contain phrases like and_(H1.freq.between(4,20), H1.comment.line('%argon%') which are quite different to text mode, but easier to compose 'on the fly' """ from pyfusion.data.DA_datamining import DA from sqlalchemy import create_engine import matplotlib.pyplot as plt import os if not os.path.isfile('H1_766.sqlite'): # if not there, make it myDA = DA('H1_766.npz') myDA.to_sqlalchemy('sqlite:///H1_766.sqlite', newfmts=dict(phases='dphi_{i:02d}'), mytable='H1_766', chunk=5000, n_recs=1e9) engine = create_engine('sqlite:///H1_766.sqlite') conn = engine.connect() result = conn.execute('select shot, t_mid, freq, a12, amp from H1_766') alldata = result.fetchall() # all the instances, and overplot in feint colour plt.plot([row['t_mid'] for row in alldata], [row['freq'] for row in alldata],'o',alpha=0.02) plt.xlabel('time (s)') ; plt.ylabel('freq (kHz)') # Ex 3a. this time, select the large rotating modes, (note - we combine two lines here, # even though it makes it less readable) plt.figure() sel = conn.execute('select shot, t_mid, freq, a12, amp from H1_766 where amp> 0.05 and a12>0.7').fetchall() # for the selected data, plot freq against t_mid in red circles('r'), whose size reflects the amplitude