def getdata(self, *fargs, **features): pfd = None def extract(key, value, pfd): feature = '%s:%s' % (key, value) if feature in self.extracted_feature: #print 'use extracted feature %s' % feature newdata = self.extracted_feature[feature] else: #print 'extracting new feature %' % feature newdata = pfd.getdata(**{key:value}) self.extracted_feature.update({feature:newdata}) return newdata data = np.array([]) #process the args (a list of single-item dictionaries) for i in fargs: key, value = i.items()[0] feature = '%s:%s' % (key, value) if (feature not in self.extracted_feature) and (pfd is None): pfd = pfddata(self.pfdfile, align=True) data = np.append(data, extract(key, value, pfd)) #process the kwargs for key, value in features.iteritems(): feature = '%s:%s' % (key, value) if (feature not in self.extracted_feature) and (pfd is None): pfd = pfddata(self.pfdfile, align=True) data = np.append(data, extract(key, value, pfd)) del(pfd) return data
def getdata(self, *fargs, **features): pfd = None def extract(key, value, pfd): feature = '%s:%s' % (key, value) if feature in self.extracted_feature: #print 'use extracted feature %s' % feature newdata = self.extracted_feature[feature] else: #print 'extracting new feature %' % feature newdata = pfd.getdata(**{key: value}) self.extracted_feature.update({feature: newdata}) return newdata data = np.array([]) #process the args (a list of single-item dictionaries) for i in fargs: key, value = i.items()[0] feature = '%s:%s' % (key, value) if (feature not in self.extracted_feature) and (pfd is None): if not type(self.pfdfile ) is str and self.pfdfile.__class__ == singlepulse: pfd = self.pfdfile elif os.path.splitext(self.pfdfile)[1] == '.pfd': pfd = pfddata(self.pfdfile, align=True) elif os.path.splitext(self.pfdfile)[1] == '.ar2': pfd = ar2data(self.pfdfile, align=True) elif os.path.splitext(self.pfdfile)[1] == '.ar': pfd = ar2data(self.pfdfile, align=True) elif os.path.splitext(self.pfdfile)[1] == '.spd': pfd = SPdata(self.pfdfile, align=True) else: print "unrecognized file format ", self.pfdfile raise Error data = np.append(data, extract(key, value, pfd)) #process the kwargs for key, value in features.iteritems(): feature = '%s:%s' % (key, value) if (feature not in self.extracted_feature) and (pfd is None): if not type(self.pfdfile ) is str and self.pfdfile.__class__ == singlepulse: pfd = self.pfdfile elif os.path.splitext(self.pfdfile)[1] == '.pfd': pfd = pfddata(self.pfdfile, align=True) elif os.path.splitext(self.pfdfile)[1] == '.ar2': pfd = ar2data(self.pfdfile, align=True) elif os.path.splitext(self.pfdfile)[1] == '.ar': pfd = ar2data(self.pfdfile, align=True) elif os.path.splitext(self.pfdfile)[1] == '.spd': pfd = SPdata(self.pfdfile, align=True) else: print "unrecognized file format ", self.pfdfile raise Error data = np.append(data, extract(key, value, pfd)) del (pfd) return data
def getdata(self, *fargs, **features): pfd = None def extract(key, value, pfd): feature = '%s:%s' % (key, value) if feature in self.extracted_feature: #print 'use extracted feature %s' % feature newdata = self.extracted_feature[feature] else: #print 'extracting new feature %' % feature newdata = pfd.getdata(**{key:value}) self.extracted_feature.update({feature:newdata}) return newdata data = np.array([]) #process the args (a list of single-item dictionaries) for i in fargs: key, value = i.items()[0] feature = '%s:%s' % (key, value) if (feature not in self.extracted_feature) and (pfd is None): if not type(self.pfdfile) is str and self.pfdfile.__class__ == singlepulse: pfd = self.pfdfile elif os.path.splitext(self.pfdfile)[1] == '.pfd': pfd = pfddata(self.pfdfile, align=True) elif os.path.splitext(self.pfdfile)[1] == '.ar2': pfd = ar2data(self.pfdfile, align=True) elif os.path.splitext(self.pfdfile)[1] == '.ar': pfd = ar2data(self.pfdfile, align=True) elif os.path.splitext(self.pfdfile)[1] == '.spd': pfd = SPdata(self.pfdfile, align=True) else: print "unrecognized file format ", self.pfdfile raise Error data = np.append(data, extract(key, value, pfd)) #process the kwargs for key, value in features.iteritems(): feature = '%s:%s' % (key, value) if (feature not in self.extracted_feature) and (pfd is None): if not type(self.pfdfile) is str and self.pfdfile.__class__ == singlepulse: pfd = self.pfdfile elif os.path.splitext(self.pfdfile)[1] == '.pfd': pfd = pfddata(self.pfdfile, align=True) elif os.path.splitext(self.pfdfile)[1] == '.ar2': pfd = ar2data(self.pfdfile, align=True) elif os.path.splitext(self.pfdfile)[1] == '.ar': pfd = ar2data(self.pfdfile, align=True) elif os.path.splitext(self.pfdfile)[1] == '.spd': pfd = SPdata(self.pfdfile, align=True) else: print "unrecognized file format ", self.pfdfile raise Error data = np.append(data, extract(key, value, pfd)) del(pfd) return data
import time t0 = time.time() #pfd_files_pulsars = glob.glob('/beegfs/vishnu/scripts/neural_network/test/pulsars/*.pfd') pfd_files_nonpulsars = sorted( glob.glob('/beegfs/vishnu/scripts/neural_network/test/nonpulsars/*.pfd')) fraction = 4 current_segment = 2 max_value = int(math.ceil(len(pfd_files_nonpulsars) / fraction)) print(max_value) double_max_value = 2 * max_value print(double_max_value) # Initialise data objects from getdata class #data_object_pulsars = [pfddata(f) for f in pfd_files_pulsars] data_object_nonpulsars = [ pfddata(f) for f in pfd_files_nonpulsars[max_value:double_max_value] ] print('loaded data into memory') # Extract 4 features based on Zhu et.al 2014 #1 time vs phase plot #time_phase_plots_pulsars = [f.getdata(intervals=48) for f in data_object_pulsars] time_phase_plots_nonpulsars = [ f.getdata(intervals=48) for f in data_object_nonpulsars ] print('time phase done') #2 freq vs phase plot #freq_phase_plots_pulsars = [f.getdata(subbands=48) for f in data_object_pulsars] freq_phase_plots_nonpulsars = [ f.getdata(subbands=48) for f in data_object_nonpulsars ]
import sys, os, glob sys.path.append( '/home/psr/software/psrchive/install/lib/python2.7/site-packages') sys.path.append('/home/psr') import numpy as np from ubc_AI.training import pfddata #pfd_files_pulsars = glob.glob('/beegfs/vishnu/scripts/neural_network/train/pulsars/*.pfd') pfd_files_nonpulsars = glob.glob( '/beegfs/vishnu/scripts/neural_network/train/nonpulsars/*.pfd') # Initialise data objects from getdata class #data_object_pulsars = [pfddata(f) for f in pfd_files_pulsars] data_object_nonpulsars = [pfddata(f) for f in pfd_files_nonpulsars] # Extract 4 features based on Zhu et.al 2014 #1 time vs phase plot #time_phase_plots_pulsars = [f.getdata(intervals=48) for f in data_object_pulsars] time_phase_plots_nonpulsars = [ f.getdata(intervals=48) for f in data_object_nonpulsars ] #2 freq vs phase plot #freq_phase_plots_pulsars = [f.getdata(subbands=48) for f in data_object_pulsars] freq_phase_plots_nonpulsars = [ f.getdata(subbands=48) for f in data_object_nonpulsars ] #3 Pulse Profile #pulse_profile_pulsars = [f.getdata(phasebins=64) for f in data_object_pulsars]
t0 = time.time() #pfd_files_pulsars = glob.glob('/beegfs/vishnu/scripts/neural_network/test/pulsars/*.pfd') pfd_files_nonpulsars = sorted( glob.glob('/beegfs/vishnu/scripts/neural_network/test/nonpulsars/*.pfd')) fraction = 4 current_segment = 2 max_value = int(math.ceil(len(pfd_files_nonpulsars) / fraction)) double_max_value = 2 * max_value triple_max_value = 3 * max_value quadruple_max_value = 4 * max_value print(quadruple_max_value) # Initialise data objects from getdata class #data_object_pulsars = [pfddata(f) for f in pfd_files_pulsars] data_object_nonpulsars = [ pfddata(f) for f in pfd_files_nonpulsars[triple_max_value:quadruple_max_value] ] print('loaded data into memory') # Extract 4 features based on Zhu et.al 2014 #1 time vs phase plot #time_phase_plots_pulsars = [f.getdata(intervals=48) for f in data_object_pulsars] time_phase_plots_nonpulsars = [ f.getdata(intervals=48) for f in data_object_nonpulsars ] print('time phase done') #2 freq vs phase plot #freq_phase_plots_pulsars = [f.getdata(subbands=48) for f in data_object_pulsars] freq_phase_plots_nonpulsars = [ f.getdata(subbands=48) for f in data_object_nonpulsars
# for filename in fnmatch.filter(filenames, '*.pfd'): # matches.append(os.path.join(root, filename)) def chunks(pfd_files, n): """Yield n-sized chunks from list of pfd files.""" for i in range(0, len(pfd_files), n): yield pfd_files[i:i + n] # Extract 4 features based on Zhu et.al 2014 if batch_size > 1: batch_number = 0 for value in chunks(pfd_files, batch_size): batch_number += 1 # Initialise data objects from getdata class data_object = [pfddata(filename) for filename in value] time_phase_data = [ filename.getdata(intervals=48) for filename in data_object ] freq_phase_data = [ filename.getdata(subbands=48) for filename in data_object ] pulse_profile_data = [ filename.getdata(phasebins=64) for filename in data_object ] dm_curve_data = [ filename.getdata(DMbins=60) for filename in data_object ] ###Save all features as numpy array files