def differentiate(x, t, smooth_factor): n = np.shape(x)[0] xdot = np.zeros(n) for i in xrange(1,n): xdot[i] = (x[i]-x[i-1])/(t[i]-t[i-1]) xdot = signalsmooth.smooth(np.array(xdot),smooth_factor,'bartlett') return xdot
def smooth(self): # create array of training data arr_normal = np.array(self.trainSet[0].keys()[0], dtype=float) arr_anomaly = np.array(self.trainSet[0].keys()[0], dtype=float) for i in range(0, len(self.trainSet)): train_point = self.trainSet[i].keys()[0] label = self.trainSet[i][train_point] if label == 'Normal': arr_normal = np.vstack([arr_normal, np.array(train_point, dtype=float)]) elif label == 'Anomaly': arr_anomaly = np.vstack([arr_anomaly, np.array(train_point, dtype=float)]) else: sys.exit("Error in the training data!") # delete first row arr_normal = np.delete(arr_normal,(0),axis=0) arr_anomaly = np.delete(arr_anomaly,(0),axis=0) # do smoothing for each of columns for col in range(arr_normal.shape[1]): arr_normal[:,col] = smooth(arr_normal[:,col]) for col in range(arr_anomaly.shape[1]): arr_anomaly[:,col] = smooth(arr_anomaly[:,col]) return arr_normal, arr_anomaly
def get_med_dspec(mspath=None, msfile=None, specfile=None, \ smoothmode='base', tsmoothwidth=40, tsmoothtype='hanning', \ fsmoothwidth=4, fsmoothtype='hannning', \ fsmooth=1, timeran=None, chanran=None, dmin=0.5, dmax=2.0): # Open the ms and plot dynamic spectrum print 'retrieving spectral data...' ms.open(mspath + '/' + msfile) ms.selectinit(datadescid=0) if timeran and (type(timeran) == str): ms.msselect({'time': timeran}) if chanran and (type(chanran) == str): [bchan, echan] = chanran.split('~') nchan = int(echan) - int(bchan) + 1 ms.selectchannel(nchan, int(bchan), 1, 1) specdata = ms.getdata(['data', 'time', 'axis_info'], ifraxis=True) npol = specdata['data'].shape[0] nchan = specdata['data'].shape[1] nbl = specdata['data'].shape[2] ntime = specdata['data'].shape[3] print 'shape of the spectral data: ', specdata['data'].shape print 'number of time pixels: ', specdata['time'].shape[0] print 'number of frequency channels selected: ', specdata['axis_info'][ 'freq_axis']['chan_freq'].shape[0] print 'number of baselines: ', specdata['axis_info']['ifr_axis'][ 'baseline'].shape[0] # ratio over time-average if smoothmode == 'hipass': specsmooth = np.copy(specdata['data']) # time smooth for i in range(npol): for j in range(nchan): for k in range(nbl): specsmooth[i, j, k, :] = signalsmooth.smooth( specdata['data'][i, j, k, :], tsmoothwidth, tsmoothtype) # frequency smooth for i in range(npol): for j in range(nbl): for k in range(ntime): specsmooth[i, :, j, k] = signalsmooth.smooth( specsmooth[i, :, j, k], fsmoothwidth, fsmoothtype) specratio = specdata['data'] / specsmooth if smoothmode == 'base': specsmooth = np.mean(specdata['data'], axis=3) specratio = specdata['data'] / specsmooth[:, :, :, None] spec_med = np.median(specratio, axis=2) if fsmooth: for i in range(npol): for j in range(ntime): spec_med[i, :, j] = signalsmooth.smooth(spec_med[i, :, j], fsmoothwidth, fsmoothtype) tim = specdata['time'] freq = specdata['axis_info']['freq_axis']['chan_freq'].reshape(nchan) tim0 = tim[0] tim0str = qa.time(qa.quantity(tim0, 's'), prec=8)[0] tim_ = tim - tim[0] freqghz = freq / 1e9 f = plt.figure(figsize=(8, 8), dpi=100) ax1 = f.add_subplot(211) f.subplots_adjust(hspace=0.4) ax1.pcolormesh(tim_, freqghz, np.abs(spec_med[0, :, :]), cmap='jet', vmin=dmin, vmax=dmax) ax1.set_xlim([tim_[0], tim_[-1]]) ax1.set_ylim([freqghz[0], freqghz[-1]]) ax1.set_title('Median-Filtered Dynamic Spectrum (RCP)') ax1.set_xlabel('Time (seconds) since ' + tim0str) ax1.set_ylabel('Frequency (GHz)') ax2 = f.add_subplot(212) ax2.pcolormesh(tim_, freqghz, np.abs(spec_med[1, :, :]), cmap='jet', vmin=dmin, vmax=dmax) ax2.set_xlim([tim_[0], tim_[-1]]) ax2.set_ylim([freqghz[0], freqghz[-1]]) ax2.set_title('Median-Filtered Dynamic Spectrum (LCP)') ax2.set_xlabel('Time (seconds) since ' + tim0str) ax2.set_ylabel('Frequency (GHz)') if specfile: np.savez(mspath + '/' + specfile, spec_med=spec_med, tim=tim, freq=freq)
def subvs2(vis=None, outputvis=None, timerange='', spw='', mode=None, subtime1=None, subtime2=None, smoothaxis=None, smoothtype=None, smoothwidth=None, splitsel=None, reverse=None, overwrite=None): """Perform vector subtraction for visibilities Keyword arguments: vis -- Name of input visibility file (MS) default: none; example: vis='ngc5921.ms' outputvis -- Name of output uv-subtracted visibility file (MS) default: none; example: outputvis='ngc5921_src.ms' timerange -- Time range of performing the UV subtraction: default='' means all times. examples: timerange = 'YYYY/MM/DD/hh:mm:ss~YYYY/MM/DD/hh:mm:ss' timerange = 'hh:mm:ss~hh:mm:ss' spw -- Select spectral window/channel. default = '' all the spectral channels. Example: spw='0:1~20' mode -- operation mode default 'linear' mode = 'linear': use a linear fit for the background to be subtracted mode = 'lowpass': act as a lowpass filter---smooth the data using different smooth types and window sizes. Can be performed along either time or frequency axis mode = 'highpass': act as a highpass filter---smooth the data first, and subtract the smoothed data from the original. Can be performed along either time or frequency axis mode = 'linear' expandable parameters: subtime1 -- Time range 1 of the background to be subtracted from the data default='' means all times. format: timerange = 'YYYY/MM/DD/hh:mm:ss~YYYY/MM/DD/hh:mm:ss' timerange = 'hh:mm:ss~hh:mm:ss' subtime2 -- Time range 2 of the backgroud to be subtracted from the data default='' means all times. examples: timerange = 'YYYY/MM/DD/hh:mm:ss~YYYY/MM/DD/hh:mm:ss' timerange = 'hh:mm:ss~hh:mm:ss' mode = 'lowpass' or 'highpass' expandable parameters: smoothaxis -- axis of smooth Default: 'time' smoothaxis = 'time': smooth is along the time axis smoothaxis = 'freq': smooth is along the frequency axis smoothtype -- type of the smooth depending on the convolving kernel Default: 'flat' smoothtype = 'flat': convolving kernel is a flat rectangle, equivalent to a boxcar moving smooth smoothtype = 'hanning': Hanning smooth kernel. See numpy.hanning smoothtype = 'hamming': Hamming smooth kernel. See numpy.hamming smoothtype = 'bartlett': Bartlett smooth kernel. See numpy.bartlett smoothtype = 'blackman': Blackman smooth kernel. See numpy.blackman smoothwidth -- width of the smooth kernel Default: 5 Examples: smoothwidth=5, meaning the width is 5 pixels splitsel -- True or False. default = False. If splitsel = False, then the entire input measurement set is copied as the output measurement set (outputvis), with background subtracted at selected timerange and spectral channels. If splitsel = True,then only the selected timerange and spectral channels are copied into the output measurement set (outputvis). reverse -- True or False. default = False. If reverse = False, then the times indicated by subtime1 and/or subtime2 are treated as background and subtracted; If reverse = True, then reverse the sign of the background-subtracted data. The option can be used for mapping absorptive structure. overwrite -- True or False. default = False. If overwrite = True and outputvis already exists, the selected subtime and spw in the output measurment set will be replaced with background subtracted visibilities """ # check the visbility ms casalog.post('input parameters:') casalog.post('vis: ' + vis) casalog.post('outputvis: ' + outputvis) casalog.post('smoothaxis: ' + smoothaxis) casalog.post('smoothtype: ' + smoothtype) casalog.post('smoothwidth: ' + str(smoothwidth)) if not outputvis or outputvis.isspace(): raise (ValueError, 'Please specify outputvis') if os.path.exists(outputvis): if overwrite: print( "The already existing output measurement set will be updated.") else: raise (ValueError, "Output MS %s already exists - will not overwrite." % outputvis) else: if not splitsel: shutil.copytree(vis, outputvis) else: ms.open(vis, nomodify=True) ms.split(outputvis, spw=spw, time=timerange, whichcol='DATA') ms.close() if timerange and (type(timerange) == str): [btimeo, etimeo] = timerange.split('~') btimeosec = qa.getvalue(qa.convert(qa.totime(btimeo), 's')) etimeosec = qa.getvalue(qa.convert(qa.totime(etimeo), 's')) timebinosec = etimeosec - btimeosec if timebinosec < 0: raise Exception( 'Negative timebin! Please check the "timerange" parameter.') casalog.post('Selected timerange: ' + timerange + ' as the time for UV subtraction.') else: casalog.post( 'Output timerange not specified, using the entire timerange') if spw and (type(spw) == str): spwlist = spw.split(';') else: casalog.post('spw not specified, use all frequency channels') # read the output data datams = mstool() datams.open(outputvis, nomodify=False) datamsmd = msmdtool() datamsmd.open(outputvis) spwinfod = datams.getspectralwindowinfo() spwinfok = spwinfod.keys() spwinfok.sort(key=int) spwinfol = [spwinfod[k] for k in spwinfok] for s, spi in enumerate(spwinfol): print('processing spectral window {}'.format(spi['SpectralWindowId'])) datams.selectinit(reset=True) staql = {'time': '', 'spw': ''} if not splitsel: # outputvis is identical to input visibility, do the selection if timerange and (type(timerange == str)): staql['time'] = timerange if spw and (type(spw) == str): staql['spw'] = spwlist[s] if not spw and not timerange: # data selection is not made print('selecting all spws and times') staql['spw'] = str(spi['SpectralWindowId']) else: # outputvis is splitted, selections have already applied, select all the data print('split the selected spws and times') staql['spw'] = str(spi['SpectralWindowId']) datams.msselect(staql) orec = datams.getdata(['data', 'time', 'axis_info'], ifraxis=True) npol, nchan, nbl, ntim = orec['data'].shape print('dimension of output data', orec['data'].shape) casalog.post('Number of baselines: ' + str(nbl)) casalog.post('Number of spectral channels: ' + str(nchan)) casalog.post('Number of time pixels: ' + str(ntim)) try: if mode == 'linear': # define and check the background time ranges if subtime1 and (type(subtime1) == str): [bsubtime1, esubtime1] = subtime1.split('~') bsubtime1sec = qa.getvalue( qa.convert(qa.totime(bsubtime1), 's')) esubtime1sec = qa.getvalue( qa.convert(qa.totime(esubtime1), 's')) timebin1sec = esubtime1sec - bsubtime1sec if timebin1sec < 0: raise Exception( 'Negative timebin! Please check the "subtime1" parameter.' ) casalog.post('Selected timerange 1: ' + subtime1 + ' as background for uv subtraction.') else: raise Exception( 'Please enter at least one timerange as the background' ) if subtime2 and (type(subtime2) == str): [bsubtime2, esubtime2] = subtime2.split('~') bsubtime2sec = qa.getvalue( qa.convert(qa.totime(bsubtime2), 's')) esubtime2sec = qa.getvalue( qa.convert(qa.totime(esubtime2), 's')) timebin2sec = esubtime2sec - bsubtime2sec if timebin2sec < 0: raise Exception( 'Negative timebin! Please check the "subtime2" parameter.' ) timebin2 = str(timebin2sec) + 's' casalog.post('Selected timerange 2: ' + subtime2 + ' as background for uv subtraction.') # plus 1s is to ensure averaging over the entire timerange else: casalog.post( 'Timerange 2 not selected, using only timerange 1 as background' ) # Select the background indicated by subtime1 ms.open(vis, nomodify=True) # Select the spw id # ms.msselect({'time': subtime1}) staql0 = {'time': subtime1, 'spw': ''} if spw and (type(spw) == str): staql0['spw'] = spwlist[s] else: staql0['spw'] = staql['spw'] ms.msselect(staql0) rec1 = ms.getdata(['data', 'time', 'axis_info'], ifraxis=True) # print('shape of the frequency matrix ',rec1['axis_info']['freq_axis']['chan_freq'].shape) sz1 = rec1['data'].shape print('dimension of selected background 1', rec1['data'].shape) # the data shape is (n_pol,n_channel,n_baseline,n_time), no need to reshape # rec1['data']=rec1['data'].reshape(sz1[0],sz1[1],sz1[2],nspw,sz1[3]/nspw,order='F') # print('reshaped rec1 ', rec1['data'].shape) rec1avg = np.average(rec1['data'], axis=3) casalog.post('Averaging the visibilities in subtime1: ' + subtime1) ms.close() if subtime2 and (type(subtime2) == str): ms.open(vis, nomodify=True) # Select the spw id staql0 = {'time': subtime2, 'spw': ''} if spw and (type(spw) == str): staql0['spw'] = spwlist[s] else: staql0['spw'] = staql['spw'] ms.msselect(staql0) rec2 = ms.getdata(['data', 'time', 'axis_info'], ifraxis=True) sz2 = rec2['data'].shape print('dimension of selected background 2', rec2['data'].shape) # rec2['data']=rec2['data'].reshape(sz2[0],sz2[1],sz2[2],nspw,sz2[3]/nspw,order='F') # print('reshaped rec1 ', rec2['data'].shape) rec2avg = np.average(rec2['data'], axis=3) ms.close() casalog.post('Averaged the visibilities in subtime2: ' + subtime2) if subtime1 and (not subtime2): casalog.post( 'Only "subtime1" is defined, subtracting background defined in subtime1: ' + subtime1) t1 = (np.amax(rec1['time']) + np.amin(rec1['time'])) / 2. print('t1: ', qa.time(qa.quantity(t1, 's'), form='ymd', prec=10)) for i in range(ntim): orec['data'][:, :, :, i] -= rec1avg if reverse: orec['data'][:, :, :, i] = -orec['data'][:, :, :, i] if subtime1 and subtime2 and (type(subtime2) == str): casalog.post( 'Both subtime1 and subtime2 are specified, doing linear interpolation between "subtime1" and "subtime2"' ) t1 = (np.amax(rec1['time']) + np.amin(rec1['time'])) / 2. t2 = (np.amax(rec2['time']) + np.amin(rec2['time'])) / 2. touts = orec['time'] print('t1: ', qa.time(qa.quantity(t1, 's'), form='ymd', prec=10)) print('t2: ', qa.time(qa.quantity(t2, 's'), form='ymd', prec=10)) for i in range(ntim): tout = touts[i] if tout > np.amax([t1, t2]): tout = np.amax([t1, t2]) elif tout < np.amin([t1, t2]): tout = np.amin([t1, t2]) orec['data'][:, :, :, i] -= (rec2avg - rec1avg) * ( tout - t1) / (t2 - t1) + rec1avg if reverse: orec['data'][:, :, :, i] = -orec['data'][:, :, :, i] elif mode == 'highpass': if smoothtype != 'flat' and smoothtype != 'hanning' and smoothtype != 'hamming' and smoothtype != 'bartlett' and smoothtype != 'blackman': raise Exception('Unknown smoothtype ' + str(smoothtype)) if smoothaxis == 'time': if smoothwidth <= 0 or smoothwidth >= ntim: raise Exception( 'Specified smooth width is <=0 or >= the total number of ' + smoothaxis) else: for i in range(orec['data'].shape[0]): for j in range(nchan): for k in range(nbl): orec['data'][i, j, k, :] -= signalsmooth.smooth( orec['data'][i, j, k, :], smoothwidth, smoothtype) if smoothaxis == 'freq': if smoothwidth <= 0 or smoothwidth >= nchan: raise Exception( 'Specified smooth width is <=0 or >= the total number of ' + smoothaxis) else: for i in range(orec['data'].shape[0]): for j in range(nbl): for k in range(ntim): orec['data'][i, :, j, k] -= signalsmooth.smooth( orec['data'][i, :, j, k], smoothwidth, smoothtype) elif mode == 'lowpass': if smoothtype != 'flat' and smoothtype != 'hanning' and smoothtype != 'hamming' and smoothtype != 'bartlett' and smoothtype != 'blackman': raise Exception('Unknown smoothtype ' + str(smoothtype)) if smoothaxis == 'time': if smoothwidth <= 0 or smoothwidth >= ntim: raise Exception( 'Specified smooth width is <=0 or >= the total number of ' + smoothaxis) else: for i in range(orec['data'].shape[0]): for j in range(nchan): for k in range(nbl): orec['data'][i, j, k, :] = signalsmooth.smooth( orec['data'][i, j, k, :], smoothwidth, smoothtype) if smoothaxis == 'freq': if smoothwidth <= 0 or smoothwidth >= nchan: raise Exception( 'Specified smooth width is <=0 or >= the total number of ' + smoothaxis) else: for i in range(orec['data'].shape[0]): for j in range(nbl): for k in range(ntim): orec['data'][i, :, j, k] = signalsmooth.smooth( orec['data'][i, :, j, k], smoothwidth, smoothtype) else: raise Exception('Unknown mode' + str(mode)) except Exception as instance: print('*** Error ***', instance) # orec['data']=orec['data'].reshape(szo[0],szo[1],szo[2],szo[3],order='F') # put the modified data back into the output visibility set del orec['time'] del orec['axis_info'] # ms.open(outputvis,nomodify=False) # if not splitsel: # outputvis is identical to input visibility, do the selection # if timerange and (type(timerange==str)): # datams.msselect({'time':timerange}) # if spw and (type(spw)==str): # datams.selectinit(datadescid=int(spwid)) # nchan=int(echan)-int(bchan)+1 # datams.selectchannel(nchan,int(bchan),1,1) # if not spw and not timerange: # data selection is not made # datams.selectinit(datadescid=0) # else: # outputvis is splitted, selections have already applied, select all the data # datams.selectinit(datadescid=0) datams.putdata(orec) datams.close() datamsmd.done()
neighbors = [] # set the k value in KNN algorithm based on the size of training set #self.k = min(self.size_normal_train, self.size_anomaly_train) self.k = 5 for i in range(self.k): neighbors.append((distances[i][0], distances[i][1])) return neighbors """ Get the label of the test_point by kNN @param test_point: the tuple (vector) being tested """ def getLabel(self, test_point): neighbors = self.getNeighbors(test_point) votes = {} for i in range(len(neighbors)): label = neighbors[i][1] votes[label] = 1 + votes.get(label, 0) sortedVotes = sorted(votes.iteritems(), key=operator.itemgetter(1), reverse=True) return sortedVotes[0][0] if __name__ == "__main__": knn_class = KNN() print knn_class.feature_scaling((8000,-2,2,6000)) print knn_class.feature_scaling((8000,0,0,6000)) print knn_class.Euclidean_distance((0,0),(3,4)) arr = np.array([[1,2,3],[1,3,4],[2,4,3],[3,2,3]], dtype=float) print arr arr[:,0] = smooth(arr[:,0]) print arr