Exemplo n.º 1
0
 def getRAandDec(self, fitsFile, catalog):
     self._debug("\tRunning xy2sky")
     tempDir = self.tempDir + os.sep + "xy2sky"
     self._debug("\tGenerating temp directory at %s" % tempDir)
     
     try:
         shutil.rmtree(tempDir)  # delete directory
     except OSError as exc:
         if exc.errno != errno.ENOENT:
             raise  # re-raise exception
     os.mkdir(tempDir)
     
     imfile = tempDir + os.sep + "imfile.txt"
     np.savetxt(imfile, catalog[['X_IMAGE','Y_IMAGE']], fmt="%0.3f")
     outfile = tempDir + os.sep + "skys.txt"
     # | cut -d \" \" -f 1-2 > %s
     commandline = wcsToolsPath + "/xy2sky -d %s @%s | awk '{print $1,$2}'> %s" % (fitsFile, imfile, outfile)
     p = subprocess.Popen(["/bin/bash", "-i", "-c", commandline], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
     output = p.communicate() #now wait
     
     res = np.loadtxt(outfile)
     catalog = append_fields(catalog, 'RA', res[:,0], usemask=False)    
     catalog = append_fields(catalog, 'DEC', res[:,1], usemask=False)    
     
     return catalog
Exemplo n.º 2
0
    def __init__(self,d={}):
# time: timestep number
        self.time=d.get('time',0)
# box: numpy array of lattice vectors
        box = d.get('box',N.zeros((3,3)))
        self.box = box
        if box.shape == (3,3): self.vc = box
        elif box.shape == (3): self.vc = N.diag(box)
        else:                  raise ValueError ('Box should be (3,3) or (3) array')
# atoms: atoms numpy array
        atoms = d.get('atoms',[])
        
        leg_list = atoms.dtype.names

        if not 'id' in leg_list:       # add 'id' column
            atoms = nlrf.append_fields(atoms, 'id', N.arange(len(atoms))+1, asrecarray=True, usemask=False)
   
        if not 'itype' in leg_list:    # add 'itype' column
            if 'label' in leg_list:
                labels = list(N.unique(atoms['label']))
                labels = dict(zip(labels, range(1,len(labels)+1)))
                ityp = N.array([labels[atom['label']] for atom in atoms])
                atoms = nlrf.append_fields(atoms, 'itype', ityp, asrecarray=True, usemask=False)
            else:   
                atoms = nlrf.append_fields(atoms, 'itype', N.ones(len(atoms)), asrecarray=True, usemask=False)

        self.atoms = atoms
Exemplo n.º 3
0
 def load_training_data(self, training_data='', **extras):
     """Read an HDF5 file with `parameters` a structured ndarray and
     `spectra` an ndarray.  Convert to a structured array of labels of
     length `ntrain` with `nlabel` fields. and an ndarray of training
     spectra of shape (nwave, ntrain).
     """
     self.has_errors = False
     with h5py.File(training_data, "r") as f:
         self.library_spectra = f['spectra'][:]
         self.library_labels = f['parameters'][:]
         self.wavelengths = f['wavelengths'][:]
         try:
             self.library_snr = self.library_spectra / f['uncertainty'][:]
             self.has_errors = True
         except:
             pass
         ancillary = f['ancillary'][:]
     # add and rename labels here.  Note that not all labels need to be or
     # will be used in the feature generation
     newfield = ['logt', 'miles_id']
     newdata = [np.log10(self.library_labels['teff']), ancillary['miles_id']]
     self.library_labels = rfn.append_fields(self.library_labels,
                                              newfield, newdata, usemask=False)
     try:
         # assuming f_nu
         fbol = np.trapz(self.library_spectra / self.wavelengths**2, self.wavelengths)
         newfield = ['logl', 'luminosity', 'fbol']
         newdata = [ancillary['logl'], 10**ancillary['logl'], fbol]
         self.library_labels = rfn.append_fields(self.library_labels,
                                                  newfield, newdata, usemask=False)            
     except:
         pass
     self.reset_mask()
Exemplo n.º 4
0
def append_fields(base, names, data, dtypes=None, fill_value=-1,
                  usemask=False,   # Different from recfunctions default
                  asrecarray=False):
    """Append fields to numpy structured array
    If fields already exists in data, will overwrite
    """
    if isinstance(names, (tuple, list)):
        # Add multiple fields at once
        if dtypes is None:
            dtypes = [d.dtype for d in data]
        # Convert to numpy arrays so we can use boolean index arrays
        names = np.array(names)
        data = np.array(data)
        dtypes = np.array(dtypes)
        not_yet_in_data = True ^ np.in1d(names, base.dtype.names)
        # Append the fields that were not in the data
        base = recfunctions.append_fields(base,
                                          names[not_yet_in_data].tolist(),
                                          data[not_yet_in_data].tolist(),
                                          dtypes[not_yet_in_data].tolist(),
                                          fill_value, usemask, asrecarray)
        # Overwrite the fields that are already in the data
        for i in np.where(True ^ not_yet_in_data)[0]:
            base[names[i]] = data[i]
        return base
    else:
        # Add single field
        if names in base.dtype.names:
            # Field already exists: overwrite data
            base[names] = data
            return base
        else:
            return recfunctions.append_fields(base, names, data, dtypes,
                                              fill_value, usemask, asrecarray)
Exemplo n.º 5
0
def get_cat(cat_filename, cut=0):
  '''  
  Opens the target file using numpy genfromtxt.  Assumes the format is given by thehead.  If coordinates are sexagesimal then converts them to degrees and appends a two new columns ('degra' and 'degdec') to the record array.  Even if coordinates are already in degrees, the 'degra' and 'degdec' columns are appended but will be identical to the original 'ra' and 'dec'.
  '''

  f = open(cat_filename,'r')
  header = f.readline().lower().split()
  f.close()

  thehead = ['hstid', 'field', 'ra', 'dec', 'v', 'verr', 'bvcol', 'bvcolerr', 'vicol', 'vicolerr']
  dtype = ['S30', 'S30', 'S30', 'S30', 'f8', 'f8', 'f8', 'f8', 'f8', 'f8']
  
  if header == thehead:
    data = np.genfromtxt(cat_filename, names=header, dtype=dtype, skip_header=True, skip_footer=cut)
  
    if ':' in data['ra'][0]:
      ra_degrees= np.array([sex2deg(x) for x in data['ra']])
      dec_degrees = np.array([sex2deg(x, RA=False) for x in data['dec']])      
      
      cat = rfn.append_fields(data, names=['degra','degdec'], data=[ra_degrees, dec_degrees], 
                              dtypes=['f8','f8'], usemask=False)
    
    else:
      cat = rfn.append_fields(data, names=['degra','degdec'], data=[data['ra'], data['dec']], 
                              dtypes=['f8','f8'], usemask=False)
    
    return cat
  
  else:
    sys.exit('Columns need to be title: %s' % thehead)
Exemplo n.º 6
0
 def readH5(self, fname):
     """ 
     Reads Catalog from H5 file, specified as argument
     """
     of=h5py.File(fname, "r")
     self.data=of["objects"].value
     self.meta=of["meta"].attrs
     if "dNdz" in of.keys():
         self.dNdz=of['dNdz'].value 
     if "bz" in of.keys():
         self.bz=of['bz'].value
     self.window=window.readWindowH5(of['window'])
     self.photoz=photoz.readPhotoZH5(of['photoz'])
     cversion=float(self.meta['version'])
     if cversion==0.1:
         print("updating 0.1 to version ", self.version)
         self.data=recfunctions.append_fields(self.data,'sigma_pz',(1+self.data["z_real_t"])*self.data["z_error"],
                                              usemask=False)
         self.data=recfunctions.append_fields(self.data,'z',self.data["z_real_t"]+(1+self.data["z_real_t"])*self.data["z_error"],
                                              usemask=False)
         self.data=self.data[ [ name for name in self.data.dtype.names if name not in ["z_real_t", "z_rsd_t","z_error"] ] ]
     if cversion==0.2:
         print("WARNING: upgrading from 0.2 to 0.3, photozs internally slightly inconsistent.")
         self.data=recfunctions.append_fields(self.data,'sigma_pz',(1+self.data["z"])*self.photoz.sigma,
                                              usemask=False)
def get_demodulated_data_from_list(filelist,freq=10,supply_index=True,phase_offset=0):
    filelist.sort() #just in case
    dd=[]
    for f in filelist:
        #only use full size files
        stats=os.stat(f)
        if stats.st_size == 10752000:
            print f
            d=demod.demodulate_dat(f,freq,supply_index=True,phase_offset=phase_offset)
            #filename is start of data taking (I think) and we'll just add 1/samprate seconds per rev
            h=np.float64(f[-12:-10])
            m=np.float64(f[-10:-8])
            s=np.float64(f[-8:-6])
            t=h+m/60.+(s+(d['rev']-d['rev'][0])/samprate)/3600.
            d=recf.append_fields(d,'localtime',t)
            ut=np.mod(t+7.,24.)
            if len(f)>21:
                y=np.zeros(len(d),dtype=np.int)+np.int(f[-21:-17])
                mo=np.zeros(len(d),dtype=np.int)+np.int(f[-17:-15])
                dy=np.zeros(len(d),dtype=np.int)+np.int(f[-15:-13])
                ut=np.mod(t+7.,24.)
                utt=t+7.
                dy[utt>ut]=dy[utt>ut]+1
                d=recf.append_fields(d,['year','month','day'],[y,mo,dy])
            d=recf.append_fields(d,'ut',ut)
            dd.append(d)
    return np.concatenate(dd)
Exemplo n.º 8
0
Arquivo: io.py Projeto: robbisg/py_eye
def merge_paradigm(trial_info, paradigm, behavioural=None, **conf):
    
    
    baseline_condition = ''
    for arg in conf:
        if arg == 'baseline':
            baseline_condition = conf[arg]

    mask_blink_outlier = np.in1d(paradigm['Trial'], trial_info['Trial'])
        
    trial_info = nprec.append_fields(trial_info, 
                                     'Label', 
                                     paradigm['Label'][mask_blink_outlier]).data

    
    mask_task = paradigm['Label'] != baseline_condition
        
    
    print 'Trials no.' + str(len(trial_info))

    if behavioural != None:
        m = mask_task * mask_blink_outlier
        m = m[1::2]
    
        trial_task_info = trial_info[trial_info['Label'] != baseline_condition]
    
        trial_cond = nprec.append_fields(trial_task_info,
                                     behavioural.dtype.names, 
                                     [behavioural[b][m] for b in behavioural.dtype.names]).data
        
        return trial_cond, trial_info
    
    else:
        
        return trial_info
Exemplo n.º 9
0
def whichGalaxyProfile(sdss):

    exp_L = np.exp(np.array([sdss['LNLEXP_G'],sdss['LNLEXP_R'],sdss['LNLEXP_I'],sdss['LNLEXP_Z']])).T
    dev_L = np.exp(np.array([sdss['LNLDEV_G'],sdss['LNLDEV_R'],sdss['LNLDEV_I'],sdss['LNLDEV_Z']])).T
    star_L = np.exp(np.array([sdss['LNLSTAR_G'],sdss['LNLSTAR_R'],sdss['LNLSTAR_I'],sdss['LNLSTAR_Z']])).T

    expfracL = exp_L /(exp_L + dev_L + star_L)
    devfracL = dev_L /(exp_L + dev_L + star_L)
    
    modelmode = np.zeros((len(sdss), 4), dtype=np.int32)

    expmodel = (expfracL > 0.5)
    modelmode[expmodel] = 0
    devmodel = (devfracL > 0.5)
    modelmode[devmodel] = 1
    neither = - (expmodel | devmodel)
    modelmode[neither] = 2
    
    sdss = rf.append_fields(sdss, 'BESTPROF_G', modelmode[:,0])
    sdss = rf.append_fields(sdss, 'BESTPROF_R', modelmode[:,1])
    sdss = rf.append_fields(sdss, 'BESTPROF_I', modelmode[:,2])
    sdss = rf.append_fields(sdss, 'BESTPROF_Z', modelmode[:,3])
    
    #print ' exp :', np.sum(expmodel),' dev :', np.sum(devmodel), 'neither :', np.sum(neither)
    return sdss
Exemplo n.º 10
0
    def getClassifiers(self):
        if not os.path.exists(self.outDir):
            os.mkdir(self.outDir)
        outDir = self.outDir + os.sep + "classPickle"
        if not os.path.exists(outDir):
            os.mkdir(outDir)
        class1Save = outDir + os.sep + "classifier1.pkl"
        class2Save = outDir + os.sep + "classifier2.pkl"
        
        class1Exists = os.path.exists(class1Save)
        class2Exists = os.path.exists(class2Save)

        if not (class1Exists and class2Exists):
            self._setupTempDir()
            self.fitsFiles = [f[:-5] for f in os.listdir(self.fitsFolder) if ".fits" in f]
            self.fitsFilesLoc = [os.path.abspath(self.fitsFolder + os.sep + f) for f in os.listdir(self.fitsFolder) if ".fits" in f]
            
            for f in self.fitsFiles:
                self.mainCatalog[f] = self.getCatalog(self.fitsFolder + os.sep + f + ".fits", ishape=True)
                self.candidateMask[f] = self._getCandidateMask(self.mainCatalog[f], np.loadtxt(self.fitsFolder + os.sep + f + ".txt"))
                self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'WEIGHT', self.candidateMask[f] * 1.0, usemask=False)    
                self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'EXTENDED', self.candidateMask[f], usemask=False)    
                self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'HLR', np.zeros(self.mainCatalog[f].shape), usemask=False)    
                self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'MAG', np.zeros(self.mainCatalog[f].shape), usemask=False)
            self._trainClassifier()
            joblib.dump(self.sc, class1Save) 
            joblib.dump(self.sc2, class2Save) 
        else:
            self.sc = joblib.load(class1Save)
            self.sc2 = joblib.load(class2Save)
            

        #self._testClassifier(catalog, candidateMask)
        #self._cleanTempDir()
        self._debug("Classifier generated. Now you can invoke .clasify(catalog)")
    def analyze_chamber_data(self,raw_chamber_data):
        ethanol_data = raw_chamber_data[raw_chamber_data['status']=='Ethanol']
        analyzed_ethanol_data = self.analyze_data(ethanol_data)
        status_array = numpy.array(['Ethanol']*len(analyzed_ethanol_data),dtype='|S25')
        analyzed_chamber_data = recfunctions.append_fields(analyzed_ethanol_data,
                                                           'status',
                                                           status_array,
                                                           dtypes='|S25',
                                                           usemask=False)

        air_before_data = raw_chamber_data[raw_chamber_data['status']=='AirBefore']
        if air_before_data.size != 0:
            analyzed_air_before_data = self.analyze_data(air_before_data)
            status_array = numpy.array(['AirBefore']*len(analyzed_air_before_data),dtype='|S25')
            analyzed_air_before_data = recfunctions.append_fields(analyzed_air_before_data,
                                                                  'status',
                                                                  status_array,
                                                                  dtypes='|S25',
                                                                  usemask=False)
            analyzed_chamber_data = recfunctions.stack_arrays((analyzed_air_before_data,analyzed_chamber_data),usemask=False)


        air_after_data = raw_chamber_data[raw_chamber_data['status']=='AirAfter']
        if air_after_data.size != 0:
            analyzed_air_after_data = self.analyze_data(air_after_data)
            status_array = numpy.array(['AirAfter']*len(analyzed_air_after_data),dtype='|S25')
            analyzed_air_after_data = recfunctions.append_fields(analyzed_air_after_data,
                                                                  'status',
                                                                  status_array,
                                                                  dtypes='|S25',
                                                                  usemask=False)
            analyzed_chamber_data = recfunctions.stack_arrays((analyzed_chamber_data,analyzed_air_after_data),usemask=False)

        return analyzed_chamber_data
Exemplo n.º 12
0
 def training_split(self, holdout_unit='none', holdout_prop=.2):
     ''' Splits the data up into test and train subsets '''
     if holdout_prop > .99 or holdout_prop < .01:
         raise ValueError('The holdout proportion must be between .1 and .99.')
     if holdout_unit == 'none':
         self.training_data = self.observation_matrix
         self.test_data = self.prediction_matrix
         self.training_type = 'make predictions'
         print 'Fitting model to all data'
     elif holdout_unit == 'datapoint':
         holdouts = np.random.binomial(1, holdout_prop, self.data_rows)
         self.training_data = np.delete(self.observation_matrix, np.where(holdouts==1)[0], axis=0)
         self.test_data = np.delete(self.observation_matrix, np.where(holdouts==0)[0], axis=0)
         self.training_type = 'datapoint'
         print 'Fitting model to ' + str((1-holdout_prop)*100) + '% of datapoints'
     elif holdout_unit == 'country-year':
         country_years = [self.observation_matrix.country[i] + '_' + str(self.observation_matrix.year[i]) for i in range(self.data_rows)]
         data_flagged = recfunctions.append_fields(self.observation_matrix, 'holdout', np.zeros(self.data_rows)).view(np.recarray)
         for i in np.unique(country_years):
             data_flagged.holdout[np.where(data_flagged.country + '_' + data_flagged.year.astype('|S4')==i)[0]] = np.random.binomial(1, holdout_prop)
         self.training_data = np.delete(data_flagged, np.where(data_flagged.holdout==1)[0], axis=0)
         self.test_data = np.delete(data_flagged, np.where(data_flagged.holdout==0)[0], axis=0)
         self.training_type = 'country-year'
         print 'Fitting model to ' + str((1-holdout_prop)*100) + '% of country-years'
     elif holdout_unit == 'country':
         data_flagged = recfunctions.append_fields(self.observation_matrix, 'holdout', np.zeros(self.data_rows)).view(np.recarray)
         for i in self.country_list:
             data_flagged.holdout[np.where(data_flagged.country==i)[0]] = np.random.binomial(1, holdout_prop)
         self.training_data = np.delete(data_flagged, np.where(data_flagged.holdout==1)[0], axis=0)
         self.test_data = np.delete(data_flagged, np.where(data_flagged.holdout==0)[0], axis=0)
         self.training_type = 'country'
         print 'Fitting model to ' + str((1-holdout_prop)*100) + '% of countries'
     else:
         raise ValueError("The holdout unit must be either 'datapoint', 'country-year', or 'country'.")
Exemplo n.º 13
0
def Tracks2Cells(tracks, falarms=None) :
    """
    Convert lists of tracks (and falarms) into a single recarray of storm cells
    with track IDs.

    This can be reversed with Cells2Tracks().
    """
    if falarms is None :
        falarms = []

    # NOTE: This function can not handle arrays of tracks that do/do not have
    #       a trackID field in a mix.  Either they all have it, or not.
    if not any('trackID' in aTrack.dtype.names for aTrack in tracks) :
        tracks = [nprf.append_fields(aTrack, 'trackID',
                                     [trackIndex] * len(aTrack),
                                     usemask=False)
                  for trackIndex, aTrack in enumerate(tracks)]

    if not any('trackID' in aTrack.dtype.names for aTrack in falarms) :
        falarms = [nprf.append_fields(aTrack, 'trackID',
                                      [-trackIndex - 1] * len(aTrack),
                                      usemask=False)
                   for trackIndex, aTrack in enumerate(falarms)]

    # If both are empty, then create an array without hstack()
    if len(tracks) != 0 or len(falarms) != 0 :
        allCells = np.hstack(tracks + falarms)
    else :
        allCells = np.array([], dtype=volume_dtype)

    return allCells
    def get_raw_chamber_data(self,filtered_data):
        # chamber_dtype = numpy.dtype([('time_secs', '<u4'),
        #                              ('time_nsecs', '<u4'),
        #                              ('time_rel', '<f4'),
        #                              ('status', '|S25'),
        #                              ('tunnel', '<u2'),
        #                              ('fly_x', '<f4'),
        #                              ('fly_y', '<f4'),
        #                              ('fly_angle', '<f4'),
        #                              ])
        header = list(FILE_TOOLS.chamber_dtype.names)
        tracking_chamber_data = filtered_data[filtered_data['status'] != 'Walk To End']
        tracking_chamber_data = tracking_chamber_data[header]
        tracking_chamber_data = tracking_chamber_data.astype(FILE_TOOLS.chamber_dtype)
        tracking_chamber_data['tunnel'] = tracking_chamber_data['tunnel']+1
        indicies = tracking_chamber_data['status'] == 'End Chamber Ethanol'
        raw_chamber_data_ethanol = tracking_chamber_data[indicies]
        raw_chamber_data_ethanol = recfunctions.drop_fields(raw_chamber_data_ethanol,
                                                            'status',
                                                            usemask=False)
        status_array = numpy.array(['Ethanol']*len(raw_chamber_data_ethanol),dtype='|S25')
        raw_chamber_data_ethanol = recfunctions.append_fields(raw_chamber_data_ethanol,
                                                              'status',
                                                              status_array,
                                                              dtypes='|S25',
                                                              usemask=False)
        raw_chamber_data = raw_chamber_data_ethanol

        ethanol_start_time = raw_chamber_data_ethanol['time_rel'][0]
        indicies = tracking_chamber_data['status'] == 'End Chamber Air'
        indicies &= tracking_chamber_data['time_rel'] < ethanol_start_time
        raw_chamber_data_air_before = tracking_chamber_data[indicies]
        raw_chamber_data_air_before = recfunctions.drop_fields(raw_chamber_data_air_before,
                                                               'status',
                                                               usemask=False)
        status_array = numpy.array(['AirBefore']*len(raw_chamber_data_air_before),dtype='|S25')
        raw_chamber_data_air_before = recfunctions.append_fields(raw_chamber_data_air_before,
                                                                 'status',
                                                                 status_array,
                                                                 dtypes='|S25',
                                                                 usemask=False)
        raw_chamber_data = recfunctions.stack_arrays((raw_chamber_data_air_before,raw_chamber_data),usemask=False)

        indicies = tracking_chamber_data['status'] == 'End Chamber Air'
        indicies &= tracking_chamber_data['time_rel'] > ethanol_start_time
        raw_chamber_data_air_after = tracking_chamber_data[indicies]
        raw_chamber_data_air_after = recfunctions.drop_fields(raw_chamber_data_air_after,
                                                               'status',
                                                               usemask=False)
        status_array = numpy.array(['AirAfter']*len(raw_chamber_data_air_after),dtype='|S25')
        raw_chamber_data_air_after = recfunctions.append_fields(raw_chamber_data_air_after,
                                                                 'status',
                                                                 status_array,
                                                                 dtypes='|S25',
                                                                 usemask=False)
        raw_chamber_data = recfunctions.stack_arrays((raw_chamber_data,raw_chamber_data_air_after),usemask=False)

        return raw_chamber_data
Exemplo n.º 15
0
def add_constant(data, prepend=False):
    '''
    This appends a column of ones to an array if prepend==False.

    For ndarrays it checks to make sure a constant is not already included.
    If there is at least one column of ones then the original array is
    returned.  Does not check for a constant if a structured or recarray is
    given.

    Parameters
    ----------
    data : array-like
        `data` is the column-ordered design matrix
    prepend : bool
        True and the constant is prepended rather than appended.

    Returns
    -------
    data : array
        The original array with a constant (column of ones) as the first or
        last column.

    Notes
    -----

    .. WARNING::
       The default of prepend will be changed to True in the next release of
       statsmodels. We recommend to use an explicit prepend in any permanent
       code.

    '''
    import warnings
    warnings.warn("The default of `prepend` will be changed to True in the "
                  "next release, use explicit prepend", FutureWarning)
    if not data.dtype.names:
        data = np.asarray(data)
        if np.any(data[0]==1):
            ind = np.squeeze(np.where(data[0]==1))
            if ind.size == 1 and np.all(data[:,ind] == 1):
                return data
            elif ind.size > 1:
                for col in ind:
                    if np.all(data[:,col] == 1):
                        return data
        data = np.column_stack((data, np.ones((data.shape[0], 1))))
        if prepend:
            return np.roll(data, 1, 1)
    else:
        return_rec = data.__class__ is np.recarray
        if prepend:
            ones = np.ones((data.shape[0], 1), dtype=[('const', float)])
            data = nprf.append_fields(ones, data.dtype.names, [data[i] for
                i in data.dtype.names], usemask=False, asrecarray=return_rec)
        else:
            data = nprf.append_fields(data, 'const', np.ones(data.shape[0]),
                    usemask=False, asrecarray = return_rec)
    return data
Exemplo n.º 16
0
    def __init__(self, pathToFile=""):
        self.path = pathToFile
        self._points = np.genfromtxt(pathToFile, delimiter=' ', names='birth, death', dtype='f8, f8')
        self.lifespan_coords = []
        self.birth_coords = []

        # Generate commonly-used derived fields: lifespan, avg_coord
        self._points = rf.append_fields(self._points, 'lifespan', self._points['death'] - self._points['birth'], dtypes='f8')
        self._points = rf.append_fields(self._points, 'avg_coord', (self._points['death'] + self._points['birth'])/2, dtypes='f8')
Exemplo n.º 17
0
def count_good_trials():
    count_file = open(path_b_2_8+'/count_trials_blink.txt', 'w')      
    count_file.write('Subj C_inc C_tot  NC_inc NC_tot 1_inc 1_tot 2_inc 2_tot 3_inc 3_tot 4_inc 4_tot\r\n')
    for file in file_list:
        d_data = load_data_eye(path_blink, file)
        trial_info = extract_trials_info(d_data)
        mask_blink_outlier = np.in1d(paradigm['Trial'], trial_info['Trial'])
        trial_info = nprec.append_fields(trial_info, 
                                         'Condition', 
                                         paradigm['Condition'][mask_blink_outlier]).data
        task_trial = trial_info[trial_info['Condition'] != 'FIX']
        name = file.split('.')[0]
        try:
            behavioural = open_behavioural(path_b, name+'.xlsx')
        except IOError, err:
            print err
            continue
        m = mask_blink_outlier[1::2]
        trial_cond = nprec.append_fields(task_trial,
                                     ['Accuracy', 'Combination'], 
                                     [behavioural['Accuracy'][m], 
                                     behavioural['Combination'][m]]).data
        par = nprec.append_fields(paradigm[1::2],
                                     'Accuracy', 
                                     behavioural['Accuracy']).data
        '''
        trial_cond = trial_cond[trial_cond['Accuracy'] == 1]
        par = par[par['Accuracy'] == 1]
        behavioural = behavioural[behavioural['Accuracy'] == 1]
        '''
        count_file.write(file+' ')
        count_file.write(str(np.count_nonzero(trial_cond['Condition'] == 'C')))
        count_file.write(' ')  
        count_file.write(str(np.count_nonzero(par['Condition'] == 'C')))
        count_file.write(' ')     
        count_file.write(str(np.count_nonzero(trial_cond['Condition'] == 'NC')))
        count_file.write(' ')  
        count_file.write(str(np.count_nonzero(par['Condition'] == 'NC'))) 
        count_file.write(' ')  
        count_file.write(str(np.count_nonzero(trial_cond['Combination'] == 1)))
        count_file.write(' ')  
        count_file.write(str(np.count_nonzero(behavioural['Combination'] == 1)))
        count_file.write(' ')  
        count_file.write(str(np.count_nonzero(trial_cond['Combination'] == 2)))
        count_file.write(' ')  
        count_file.write(str(np.count_nonzero(behavioural['Combination'] == 2)))
        count_file.write(' ')  
        count_file.write(str(np.count_nonzero(trial_cond['Combination'] == 3)))
        count_file.write(' ')  
        count_file.write(str(np.count_nonzero(behavioural['Combination'] == 3)))
        count_file.write(' ')  
        count_file.write(str(np.count_nonzero(trial_cond['Combination'] == 4)))
        count_file.write(' ')  
        count_file.write(str(np.count_nonzero(behavioural['Combination'] == 4)))
        count_file.write(' \r\n')
Exemplo n.º 18
0
def DESdperp_to_SDSSdperp(fullsdss, fulldes):
    
    modelmag_g = fullsdss['MODELMAG_G'] - fullsdss['EXTINCTION_G']
    modelmag_r = fullsdss['MODELMAG_R'] - fullsdss['EXTINCTION_R']
    modelmag_i = fullsdss['MODELMAG_I'] - fullsdss['EXTINCTION_I']
    modelmag_z = fullsdss['MODELMAG_Z'] - fullsdss['EXTINCTION_Z']
    dperp_sdss = (modelmag_r - modelmag_i) - (modelmag_g - modelmag_r)/8.0

    des, sdss = match(fulldes, fullsdss)
    
    modelmag_g_des = des['MODELMAG_G_DES'] - des['XCORR_SFD98_G']
    modelmag_r_des = des['MODELMAG_R_DES'] - des['XCORR_SFD98_R']
    modelmag_i_des = des['MODELMAG_I_DES'] - des['XCORR_SFD98_I']
    modelmag_z_des = des['MODELMAG_Z_DES'] - des['XCORR_SFD98_Z']
    dperp_des = (modelmag_r_des - modelmag_i_des) - (modelmag_g_des - modelmag_r_des)/8.0

    expcut = (des['IM3_GALPROF'] == 1)
    devcut = (des['IM3_GALPROF'] == 2)
    
    #magcut = ((des['MODELMAG_G_DES'] < 22.0) & (des['MODELMAG_R_DES'] < 22.0) &
    #(des['MODELMAG_I_DES'] < 22.0) & (des['MODELMAG_Z_DES'] < 22.0) )
    #magcut = ((sdss['MODELMAG_R'] < 22.0) & (sdss['MODELMAG_I'] < 22.0)
    #          &(sdss['MODELMAG_G'] < 22.0) & (sdss['MODELMAG_Z'] < 22.0))
    
    use =  (#(18.0 < des['CMODELMAG_I_DES']) &
            #(20.9 > des['CMODELMAG_I_DES']) &
            #(des['MODELMAG_R_DES'] - des['MODELMAG_I_DES'] < 2.) &
            (des['FIBER2MAG_I_DES'] < 21.5 )
            )
            
    #use = use  & magcut
            
    des_exp = des[expcut & use]
    des_dev = des[devcut & use]
    sdss_exp = sdss[expcut & use]
    sdss_dev = sdss[devcut & use]
    
    #des_exp_dperp = dperp_fitting(des_exp, sdss_exp)
    #des_dev_dperp = dperp_fitting(des_dev, sdss_dev)
    
    #SDSSlike_dperp = np.zeros(len(fulldes), dtype=np.float32)
    #SDSSlike_dperp[expcut & use] = des_exp_dperp
    #SDSSlike_dperp[devcut & use] = des_dev_dperp
    
    try :
        #fulldes = rf.append_fields(fulldes, 'DPERP_DES', SDSSlike_dperp)
        fulldes = rf.append_fields(fulldes, 'DPERP', dperp_des)
        fullsdss = rf.append_fields(fullsdss, 'DPERP', dperp_sdss)

    except ValueError:
        #fulldes['DPERP_DES'] = SDSSlike_dperp
        fulldes['DPERP'] = dperp_des
        fullsdss['DPERP'] = dperp_sdss

    return fullsdss, fulldes
Exemplo n.º 19
0
def add_constant(data, prepend=True, has_constant='skip'):
    '''
    This appends a column of ones to an array if prepend==False.

    Parameters
    ----------
    data : array-like
        `data` is the column-ordered design matrix
    prepend : bool
        True and the constant is prepended rather than appended.
    has_constant : str {'raise', 'add', 'skip'}
        Behavior if ``data'' already has a constant. The default will return
        data without adding another constant. If 'raise', will raise an
        error if a constant is present. Using 'add' will duplicate the
        constant, if one is present. Has no effect for structured or
        recarrays. There is no checking for a constant in this case.

    Returns
    -------
    data : array
        The original array with a constant (column of ones) as the first or
        last column.
    '''
    if _is_using_pandas(data, None):
        # work on a copy
        return _pandas_add_constant(data.copy(), prepend, has_constant)
    else:
        data = np.asarray(data)
    if not data.dtype.names:
        var0 = data.var(0) == 0
        if np.any(var0):
            if has_constant == 'raise':
                raise ValueError("data already contains a constant.")
            elif has_constant == 'skip':
                return data
            elif has_constant == 'add':
                pass
            else:
                raise ValueError("Option {0} not understood for "
                                 "has_constant.".format(has_constant))
        data = np.column_stack((data, np.ones((data.shape[0], 1))))
        if prepend:
            return np.roll(data, 1, 1)
    else:
        return_rec = data.__class__ is np.recarray
        if prepend:
            ones = np.ones((data.shape[0], 1), dtype=[('const', float)])
            data = nprf.append_fields(ones, data.dtype.names,
                                      [data[i] for i in data.dtype.names],
                                      usemask=False, asrecarray=return_rec)
        else:
            data = nprf.append_fields(data, 'const', np.ones(data.shape[0]),
                                      usemask=False, asrecarray=return_rec)
    return data
Exemplo n.º 20
0
    def readascii(self, infile, form='short'):

        if form=='short':
            dnames=('name', 'targ_ra', 'targ_dec', 'equinox', 'mag', 'band', 'priority')
            dformat=('S30', 'f4', 'f4', 'i4', 'f4', 'S1', 'f4')
            object_arr=np.loadtxt(infile, dtype={'names': dnames, 'formats': dformat},
                         converters={1:ra_read, 2:dec_read})
            #determine the missing values
            mnames=[]
            mtypes=[]
            for i in range(len(self.dnames)):
                if self.dnames[i] not in dnames:
                   mnames.append(self.dnames[i])
                   mtypes.append(self.dformat[i])
            #set up the default values
            default_list=[np.zeros(len(object_arr))]*len(mnames)
            default_list[0]=default_list[0]+self.default_width
            default_list[1]=default_list[1]+0.5*self.default_length
            default_list[2]=default_list[2]+0.5*self.default_length
            object_arr=rfn.append_fields(object_arr, names=mnames, data=default_list, dtypes=mtypes,
                     fill_value=0, usemask=False)
        elif form=='long':
            dnames=('name', 'targ_ra', 'targ_dec', 'equinox', 'mag', 'band', 'priority', 'width', 'length', 'tilt')
            dformat=('S30', 'f4', 'f4', 'i4', 'f4', 'S1', 'f4', 'f4', 'f4', 'f4')
            object_arr=np.loadtxt(infile, dtype={'names': dnames, 'formats': dformat},
                         converters={1:ra_read, 2:dec_read})
            #determine the missing values
            mnames=[]
            mtypes=[]
            for i in range(len(self.dnames)):
                if self.dnames[i] not in dnames:
                   mnames.append(self.dnames[i])
                   mtypes.append(self.dformat[i])
            #set up the default values
            default_list=[np.zeros(len(object_arr))]*len(mnames)
            object_arr=rfn.append_fields(object_arr, names=mnames, data=default_list, dtypes=mtypes,
                     fill_value=0, usemask=False)
        else:
            message='This format is not supported'
            raise SlitError(message)

        #set objects that are preselected
        object_arr['inmask_flag'] = 1.0*(object_arr['priority'] >= 1.0)
        #set reference stars
        object_arr['refstar_flag'] = 1.0*(object_arr['priority'] == -1.0)
     
        #stack the data if it already exists
        if self.data is None:  
           self.data=object_arr
        else:
           self.data=self.add_arrays(self.data, object_arr)
        # total number of objects:
        self.nobjects=len(self.data)
        self.update_flags()
Exemplo n.º 21
0
    def test_append_fields_dtype_list(self):
        # Ticket #1676
        from numpy.lib.recfunctions import append_fields

        base = np.array([1, 2, 3], dtype=np.int32)
        names = ['a', 'b', 'c']
        data = np.eye(3).astype(np.int32)
        dlist = [np.float64, np.int32, np.int32]
        try:
            append_fields(base, names, data, dlist)
        except Exception:
            raise AssertionError()
    def analyze_data(self,raw_data):
        initialized = False
        tunnels = set(raw_data['tunnel'])
        for tunnel in tunnels:
            tunnel_data_raw = raw_data[raw_data['tunnel']==tunnel]
            time_rel = tunnel_data_raw['time_rel']
            delta_time = numpy.diff(time_rel)
            tunnel_array = numpy.ones(len(delta_time),dtype=numpy.uint16)*tunnel
            tunnel_array.dtype = numpy.dtype([('tunnel','<u2')])
            tunnel_data_analyzed = tunnel_array

            fly_x = tunnel_data_raw['fly_x']
            delta_fly_x = numpy.diff(fly_x)

            fly_y = tunnel_data_raw['fly_y']
            delta_fly_y = numpy.diff(fly_y)

            distance = numpy.sqrt(numpy.square(delta_fly_x)+numpy.square(delta_fly_y))

            velocity = distance/delta_time

            fly_angle = tunnel_data_raw['fly_angle']
            delta_fly_angle = numpy.abs(numpy.diff(fly_angle))
            flipped = 180 - delta_fly_angle
            flipped_is_less = flipped < delta_fly_angle
            delta_fly_angle[flipped_is_less] = flipped[flipped_is_less]

            angular_velocity = delta_fly_angle/delta_time

            time_secs = tunnel_data_raw['time_secs'][:-1]
            time_nsecs = tunnel_data_raw['time_nsecs'][:-1]

            names = ['time_secs','time_nsecs']
            tunnel_data_seq = [time_secs,time_nsecs]
            tunnel_data_analyzed = recfunctions.append_fields(tunnel_data_analyzed,
                                                              names,
                                                              tunnel_data_seq,
                                                              dtypes=numpy.uint64,
                                                              usemask=False)
            names = ['delta_time','delta_fly_x','delta_fly_y','distance','velocity','delta_fly_angle','angular_velocity']
            tunnel_data_seq = [delta_time,delta_fly_x,delta_fly_y,distance,velocity,delta_fly_angle,angular_velocity]
            tunnel_data_analyzed = recfunctions.append_fields(tunnel_data_analyzed,
                                                              names,
                                                              tunnel_data_seq,
                                                              dtypes=numpy.float32,
                                                              usemask=False)
            if initialized:
                analyzed_data = recfunctions.stack_arrays((analyzed_data,tunnel_data_analyzed),usemask=False)
            else:
                analyzed_data = tunnel_data_analyzed
                initialized = True
        return analyzed_data
Exemplo n.º 23
0
def vol_international(data,dbh,log_length):
	if log_length == 8:
		vol_8log = ((0.44*(data[dbh]))**2)-(1.20*(data[dbh])-(1.30))
		return recfunctions.append_fields(data,"8ft_log_vol",vol_8log)
	elif log_length == 12:
		vol_12log = ((0.66*(data[dbh]))**2)-(1.47*(data[dbh])-(0.79))
		return recfunctions.append_fields(data,"12ft_log_vol",vol_12log)
	elif log_length == 16:
		vol_16log = ((0.88*(data[dbh]))**2)-(1.52*(data[dbh])-(1.36))
		return recfunctions.append_fields(data,"16ft_log_vol",vol_16log)
	else:
		print "No valid log lenth entered!"
		sys.exit(1)
Exemplo n.º 24
0
 def add_field(array,field_name,field_type,replace=False):
     """
     Note: I think we're passed a copy of the array, so you have
     to take the result and assign it to your variable
     example: data = add_field(data,'new_field',float32)
     Possibilities for field_type include: float32, _s
     Delete this and only use the method below if it
     proves to be good enough
     """
     if not data_mine.has_fields(array,[field_name]):
         return recfunctions.append_fields(array,field_name,zeros(len(array)),field_type,usemask=False)
     elif(replace):
         return recfunctions.append_fields(array,field_name,zeros(len(array)),field_type,usemask=False)
def compare_columns(data, name_1, name_2, name_combo, mask, function):
    '''
    Compare two columns and if there is data in both of them
    complete a particular function:
        Functions can be 'average', 'diff' or 'max'
    The mask is a column of 0, 1, 2 and 3 which tells you whether to
    use neither data point (999), only data['name_1'], only data['name_2']
    or carryout the function on both data.

    Returns: data
    '''

    # Define your data
    data_1 = data[ name_1 ]
    data_2 = data[ name_2 ]
    data_combo_name = name_combo
    
    # Fill in the data initally with 999s
    data_combo = np.ones_like(data_1) * 999.
    
    # If there is only one data point fill that in
    data_combo[mask==1] = data_1[mask==1]
    data_combo[mask==2] = data_2[mask==2]

    # If there are two data points calculate:
    if function == 'average':
        data_combo[mask==3] = ( data_1[mask==3] + data_2[mask==3] ) / 2
    elif function == 'diff':
        data_combo[mask==3] = ( data_2[mask==3] - data_1[mask==3] )
    elif function == 'max':
        data_combo[mask==3] = np.maximum( data_2[mask==3], data_1[mask==3] )

    # Append this data to our recarray
    data = rec.append_fields(base=data,
                        names=name_combo,
                        data=data_combo,
                        dtypes=None,
                        usemask=False,
                        asrecarray=True )
                        
    # And also include a copy of the mask
    data = rec.append_fields(base=data,
                        names=name_combo + '_mask',
                        data=mask,
                        dtypes=None,
                        usemask=False,
                        asrecarray=True )
    
    
    return data
Exemplo n.º 26
0
def load_data(fname='Brain24ft_v0307-Jun-2014 091700_labels.csv'):

	# NOTE "nodes" is used to refer to both nodes and bars! TODO fix this naming convention

	X = np.genfromtxt(fname, delimiter=',', names=True, dtype=[('step', '|S50'), ('nodes', '|S50'), ('angles', '|S50'), ('length', '|S50')])
	X = recfunctions.append_fields(X, 'step int', np.zeros((X.shape[0],), dtype=int))
	X = recfunctions.append_fields(X, 'node 1 inout', np.zeros((X.shape[0],), dtype='|S3'))
	X = recfunctions.append_fields(X, 'node 2 inout', np.zeros((X.shape[0],), dtype='|S3'))
	X = recfunctions.append_fields(X, 'node 1 TLA', np.zeros((X.shape[0],), dtype='|S3'))
	X = recfunctions.append_fields(X, 'node 2 TLA', np.zeros((X.shape[0],), dtype='|S3'))
	X = recfunctions.append_fields(X, 'node 1 order', np.zeros((X.shape[0],), dtype=int))
	X = recfunctions.append_fields(X, 'node 2 order', np.zeros((X.shape[0],), dtype=int))
	X = recfunctions.append_fields(X, 'bar length', np.zeros((X.shape[0],), dtype=float))
	for ii in range(X.shape[0]):
		X['step int'][ii] = extract_step_number(X['step'][ii])
		X['node 1 TLA'][ii], X['node 1 order'][ii], X['node 1 inout'][ii], \
		X['node 2 TLA'][ii], X['node 2 order'][ii], X['node 2 inout'][ii], \
		X['bar length'][ii] = \
		extract_nodes_length(X['nodes'][ii] + ', ' + X['length'][ii])

	max_order = defaultdict(int)
	for ii in range(X.shape[0]):
		tla = X['node 1 TLA'][ii]
		order = X['node 1 order'][ii]
		if order > max_order[tla]:
			max_order[tla] = order
		tla = X['node 2 TLA'][ii]
		order = X['node 2 order'][ii]
		if order > max_order[tla]:
			max_order[tla] = order

	return X, max_order
Exemplo n.º 27
0
def build_utc_array(source, sink, start, end):

    source_prices = retrieve_node_data(source, start, end)
    sink_prices = retrieve_node_data(sink, start, end)

    source_data = []

    for element in source_prices:
        source_data.append((element[0].replace(tzinfo=pytz.timezone('EST')),
                            element[1],
                            element[2],
                            element[5]))

    sink_data = []

    for element in sink_prices:
        sink_data.append((element[0].replace(tzinfo=pytz.timezone('EST')),
                          element[1],
                          element[2],
                          element[5]))

    sink_dt = numpy.dtype([('time_id', 'S32'),
                      ('sink_node_id', 'i8'),
                      ('sink_rt_lmp', 'f8'),
                      ('sink_da_lmp', 'f8')])

    source_dt = numpy.dtype([('time_id', 'S32'),
                      ('source_node_id', 'i8'),
                      ('source_rt_lmp', 'f8'),
                      ('source_da_lmp', 'f8')])


    sink_array = numpy.array(sink_data, dtype=sink_dt)
    source_array = numpy.array(source_data, dtype=source_dt)

    joined = rfn.join_by('time_id', sink_array,
                                    source_array,
                                    jointype='inner', usemask=False)

    rt_congestion_rounded = numpy.round(joined['sink_rt_lmp'] - joined['source_rt_lmp'], 2)
    da_congestion_rounded = numpy.round(joined['sink_da_lmp'] - joined['source_da_lmp'], 2)
    profit_rounded = numpy.round(rt_congestion_rounded - da_congestion_rounded, 2)

    joined = rfn.append_fields(joined, 'rt_congestion', data=rt_congestion_rounded)
    joined = rfn.append_fields(joined, 'da_congestion', data=da_congestion_rounded)
    joined = rfn.append_fields(joined, 'profit', data=profit_rounded)

    return joined[['time_id', 'rt_congestion']]
Exemplo n.º 28
0
 def readPhotInfo(self,level=.5):
 
     '''
     Read the photometry band information associated with photometry of this
     SED.
     
     @keyword level: The level at which the cut off for significant 
                     transmission of the photometric bands is placed.
                     
                     (default: 0.5)        
     @type level: float
     
     '''
     
     #-- Get photometry bands info from IvS repo. recarray structure same as 
     #   self.photbands_ivs
     filter_info = filters.get_info()
     keep = np.searchsorted(filter_info['photband'],self.photbands)
     self.filter_info = filter_info[keep]
     self.filter_info.eff_wave = self.filter_info.eff_wave/1e4
     
     response = [filters.get_response(photband) 
                 for photband in self.photbands]
     selection = [waver[transr/max(transr)>level]/1e4
                  for waver,transr in response]
     wlower = [sel[0] for sel in selection]
     wupper = [sel[-1] for sel in selection]
     self.filter_info = recfunc.append_fields(self.filter_info,\
                                              ['wlower','wupper'],\
                                              [wlower,wupper],usemask=0,\
                                              asrecarray=1)
Exemplo n.º 29
0
def merge_cort(data, cortisol_filename):
    
    cort_data = np.genfromtxt(cortisol_filename, dtype=None, names=True, delimiter='\t')
    
    names = list(cort_data.dtype.names)
    
    # Find all the columns in cort_data that have 'av' in their title
    # and not '_mask'
    drop_names = names[8:]

    cort_data = nprf.drop_fields(cort_data, drop_names, usemask=False, asrecarray=True)
    
    data = nprf.join_by('SubID', data, cort_data, jointype='leftouter',
                            r1postfix='KW', r2postfix='KW2', usemask=False,asrecarray=True)
    
    # Bizzarely, the join_by function pads with the biggest numbers it can think of!
    # So we're going to replace everything over 999 with 999
    for name in names[1:8]:
        data[name][data[name]>999] = 999
    
    # Define a UsableCort field: 1 if ANY of the cortisol values are not 999
    cort_array = np.vstack( [ data[name] for name in names[1:8]])
    usable_cort_array = np.ones(cort_array.shape[1])
    usable_cort_array[np.any(cort_array<>999, axis=0)] = 1
    
    data = nprf.append_fields(base = data, names='UsableCort', data = usable_cort_array, usemask=False)

    return data
Exemplo n.º 30
0
    def filter_effects(self):
        """
        Merge effects and data, and flip effect alleles 
        """
        effect_positions=self.effects[["CHR", "POS"]]
        data_positions=self.data.snp[["CHR", "POS"]]

        effect_include=np.in1d(effect_positions, data_positions)
        data_include=np.in1d(data_positions, effect_positions)

        self.data.filter_snps(data_include)
        self.effects=self.effects[effect_include]
        # Just give up and convert to float. I have no idea why int doesn't work here
        # but it's something to do with the fact that you can't have None as a numpy int
        # wheras float gets converted to nan. 
        tmp_data=nprec.append_fields(self.data.snp, "GENO", None, dtypes=[(float,self.data.geno.shape[1])],usemask=False)
        tmp_data["GENO"]=self.data.geno
        self.effects=nprec.join_by(["CHR", "POS"], self.effects, tmp_data, usemask=False, jointype="inner")
        flipped=0
        removed=0
        for rec in self.effects:
            if rec["EFFECT"]==rec["REF"] and rec["OTHER"]==rec["ALT"]:
                pass
            elif rec["OTHER"]==rec["REF"] and rec["EFFECT"]==rec["ALT"]:
                flipped+=1
                rec["OTHER"]=rec["ALT"]
                rec["EFFECT"]=rec["REF"]
                rec["BETA"]=-rec["BETA"]
            else:
                removed+=1
                rec["EFFECT"]=rec["OTHER"]="N"

        self.effects=self.effects[self.effects["EFFECT"]!="N"]
        print( "Removed "+str(removed)+" non-matching alleles",file=sys.stderr)
        print( "Flipped "+str(flipped)+" alleles",file=sys.stderr)
def knearest(distarr, k):
    dists = distarr.shape[1]
    pointdists = np.zeros((dists, 2))
    scores = np.array(distarr[:, dists-1])
    nearest = []
    #for each test point
    for i in range(dists-1):
        pointdists = distarr[:, i]
        pointdists = append_fields(pointdists, 'scores', scores, usemask=False)
        # get distances sorted smallest to largest
        sorted = np.sort(pointdists)
        # get k smallest classifiers (+1 or -1) from the sorted list
        nearest.append(sorted[:k]['scores'])
    #list of each test points k nearest neghbors as their classifiers
    return nearest
Exemplo n.º 32
0
def get_demodulated_data_from_list(filelist,
                                   freq=10,
                                   supply_index=False,
                                   phase_offset=0):
    filelist.sort()  #just in case

    dd = []
    for f in filelist:
        #only use full size files
        stats = os.stat(f)
        #print(stats.st_size)
        if stats.st_size > 5000000:  #full length is 10752000:
            d = demod.demodulate_dat(f,
                                     freq,
                                     supply_index=False,
                                     phase_offset=phase_offset)
            #filename is start of data taking (I think) and we'll just add 1/samprate seconds per rev
            h = np.float64(f[-12:-10])
            m = np.float64(f[-10:-8])
            s = np.float64(f[-8:-6])
            t = h + m / 60. + (s + (d['rev'] - d['rev'][0]) / samprate) / 3600.
            d = recf.append_fields(d, 'localtime', t)
            ut = np.mod(t + 7., 24.)
            if len(f) > 21:
                y = np.zeros(len(d), dtype=np.int) + np.int(f[-21:-17])
                mo = np.zeros(len(d), dtype=np.int) + np.int(f[-17:-15])
                dy = np.zeros(len(d), dtype=np.int) + np.int(f[-15:-13])
                ut = np.mod(t + 7., 24.)
                utt = t + 7.
                dy[utt > ut] = dy[utt > ut] + 1
                d = recf.append_fields(d, ['year', 'month', 'day'],
                                       [y, mo, dy])
            d = recf.append_fields(d, 'ut', ut)
            dd.append(d)

    return np.concatenate(dd)
Exemplo n.º 33
0
    def __setattr__(self, name, value):
        listofattributes = self.__dict__.keys()

        if isinstance(value, numpy.ndarray) and name != "_data" and name not in listofattributes :
            if value.shape != self.data .shape:
                raise ValueError('Arrays should have the same dimensions')
            else:
                from numpy.lib import recfunctions
                detail = getattr(value, 'provenance', None)
                data = recfunctions.append_fields(self.data , name, value, usemask=False)
                self._data  = data
                self._provenance += Transformation("Array {0} has been created".format(name), detail)
                self.__add_var(name)
        else:
            dict.__setattr__(self, name, value)
Exemplo n.º 34
0
def freq(a, cls_flds=None, stat_fld=None):
    """Frequency and crosstabulation

    Parameters
    ----------
    a : array
        A structured array.
    flds : field
        Fields to use in the analysis.

    Notes
    -----
    1. Slice the input array by the classification fields.
    2. Sort the sliced array using the flds as sorting keys.
    3. Use unique on the sorted array to return the results and the counts.

    >>> np.unique(ar, return_index=False, return_inverse=False,
    ...           return_counts=True, axis=None)
    """
    if stat_fld is None:
        a = a[cls_flds]  # (1) It is actually faster to slice the whole table
    else:
        all_flds = cls_flds + [stat_fld]
        a = a[all_flds]
    idx = np.argsort(a, axis=0, order=cls_flds)  # (2)
    a_sort = a[idx]
    uni, inv, cnts = np.unique(a_sort[cls_flds],
                               False,
                               True,
                               return_counts=True)  # (3)
    out_flds = "Counts"
    out_data = cnts
    if stat_fld is not None:
        splitter = np.where(np.diff(inv) == 1)[0] + 1
        a0 = a_sort[stat_fld]
        splits = np.split(a0, splitter)
        sums = np.asarray([np.nansum(i.tolist()) for i in splits])
        nans = np.asarray([np.sum(np.isnan(i.tolist())) for i in splits])
        mins = np.asarray([np.nanmin(i.tolist()) for i in splits])
        means = np.asarray([np.nanmean(i.tolist()) for i in splits])
        maxs = np.asarray([np.nanmax(i.tolist()) for i in splits])
        out_flds = [
            out_flds, stat_fld + "_sums", stat_fld + "_NaN", stat_fld + "_min",
            stat_fld + "_mean", stat_fld + "_max"
        ]
        out_data = [out_data, sums, nans, mins, means, maxs]
    out = append_fields(uni, names=out_flds, data=out_data, usemask=False)
    return out
Exemplo n.º 35
0
def genrand(data, n, cosmo, width=.2, plot=True, plot_filename=None):
    '''
	generates random catalog with random sky distribution and redshift
	To filter based on the BASS sensitivity map, set 'use_BASS_sens_map' to True
	
	'''
    goodz = data['z'] > 0
    if 'weight' in data.dtype.names:
        weights = data['weight'][goodz]
    else:
        weights = None
    d = data[goodz]
    z_arr = d['z']
    ra_arr = d['ra']
    dec_arr = d['dec']
    ur, uind = np.unique(d['ra'], return_index=True)
    udata = d[uind]
    ndata = len(udata)

    #generate random redshifts
    n_rand = int(round(n * ndata))
    z_grid = np.linspace(min(z_arr), max(z_arr), 1000)
    kde = weighted_gaussian_kde(z_arr, bw_method=width, weights=weights)
    kdepdfz = kde.evaluate(z_grid)
    zr_arr = generate_rand_from_pdf(pdf=kdepdfz, num=n_rand, x_grid=z_grid)

    #generate sky coords
    ind = np.random.randint(ndata, size=n_rand)
    rar_arr = udata['ra'][ind]
    decr_arr = udata['dec'][ind]

    temp = list(zip(zr_arr, rar_arr, decr_arr))
    rcat = np.zeros((len(zr_arr), ),
                    dtype=[('z', '<f8'), ('ra', '<f8'), ('dec', '<f8')])
    rcat[:] = temp

    randoms = rcat
    rcdists = np.array(
        [cosmo.comoving_distance(z).value for z in randoms['z']]) * cosmo.h
    randoms = append_fields(randoms, 'cdist', rcdists)
    random = np.array(randoms)

    print('number of randoms:', len(randoms))

    if plot:
        plot_zdist(d, randoms, z_grid, kdepdfz, plot_filename, weights=weights)

    return randoms
    def add_standard_properties(self):
        """ Augments the dataset with a series of standard properties that we use in assembly bias
		calculations.
		"""
        # 5 definitions of halo concentration.
        cNFW200b = self.data['halo_R200b'] / self.data['halo_rs']
        cV200b = self.calculate_cV('200b')
        cVsp_mean = self.calculate_cV('sp_mean')
        cVsp_percentile75 = self.calculate_cV('sp_percentile75')
        cVsp_percentile87 = self.calculate_cV('sp_percentile87')

        # Plus several halo size ratios.
        sizeratiosp87_200b = self.data['halo_Rsp_percentile87'] / self.data[
            'halo_R200b']
        sizeratiosp75_200b = self.data['halo_Rsp_percentile75'] / self.data[
            'halo_R200b']
        sizeratiospmean_200b = self.data['halo_Rsp_mean'] / self.data[
            'halo_R200b']
        sizeratiosp87_spmean = self.data['halo_Rsp_percentile87'] / self.data[
            'halo_Rsp_mean']

        # And the same mass ratios
        massratiosp87_200b = self.data['halo_Msp_percentile87'] / self.data[
            'halo_M200b']
        massratiosp75_200b = self.data['halo_Msp_percentile75'] / self.data[
            'halo_M200b']
        massratiospmean_200b = self.data['halo_Msp_mean'] / self.data[
            'halo_M200b']
        massratiosp87_spmean = self.data['halo_Msp_percentile87'] / self.data[
            'halo_Msp_mean']

        # And randoms for calculation ease
        uniformrands = np.random.uniform(0, 1, len(self.data))

        # Add these all into the data.
        self.data = append_fields(
            self.data, ('halo_cNFW200b', 'halo_cV200b', 'halo_cVsp_mean',
                        'halo_cVsp_percentile75', 'halo_cVsp_percentile87',
                        'halo_sizeratiosp87_200b', 'halo_sizeratiosp75_200b',
                        'halo_sizeratiospmean_200b',
                        'halo_sizeratiosp87_spmean', 'halo_massratiosp87_200b',
                        'halo_massratiosp75_200b', 'halo_massratiospmean_200b',
                        'halo_massratiosp87_spmean', 'err_rands'),
            (cNFW200b, cV200b, cVsp_mean, cVsp_percentile75, cVsp_percentile87,
             sizeratiosp87_200b, sizeratiosp75_200b, sizeratiospmean_200b,
             sizeratiosp87_spmean, massratiosp87_200b, massratiosp75_200b,
             massratiospmean_200b, massratiosp87_spmean, uniformrands),
            usemask=False)
Exemplo n.º 37
0
    def load_array(self, d, file):

        import time
        t0 = time.time()

        if self.params['has_sheared'] & (file == 'shapefile'):
            d['flags_1p'] = 'flags_select_1p'
            d['flags_1m'] = 'flags_select_1m'
            d['flags_2p'] = 'flags_select_2p'
            d['flags_2m'] = 'flags_select_2m'

        if self.params['pdf_type'] == 'pdf':
            keys = [
                key for key in d.keys()
                if (d[key] is not None) & (key is not 'pzstack')
            ]
        else:
            keys = [key for key in d.keys() if (d[key] is not None)]

        if 'objid' in keys:
            dtypes = [('objid', 'i8')]
        else:
            raise ValueError('missing object id in ' + file)
        dtypes += [(key, 'f8') for key in keys if (key is not 'objid')]
        if self.params['pdf_type'] == 'pdf':
            dtypes += [('pzstack_' + str(i), 'f8')
                       for i in range(len(self.params['pdf_z']))]

        fits = fio.FITS(self.params[file])[-1]
        array = fits.read(columns=[d[key] for key in keys])

        array = rename_fields(array, {v: k for k, v in d.iteritems()})

        if ('weight' not in array.dtype.names) & (file == 'shapefile'):
            array = append_fields(array,
                                  'weight',
                                  np.ones(len(array)),
                                  usemask=False)

        if self.params['pdf_type'] == 'pdf':
            for i in range(len(self.params['pdf_z'])):
                array['pzstack' + str(i)] = fits.read(columns=d['pzstack'] +
                                                      str(i))

        if np.any(np.diff(array['objid']) < 1):
            raise ValueError('misordered or duplicate ids in ' + file)

        return array
Exemplo n.º 38
0
def cross_validate(args):
    assert len(args['bw_key']) == len(args['bw'])
    if not os.path.exists(args['outfolder']):
        os.makedirs(args['outfolder'])
    args['phi0'] *= 1e-18  # correct units
    kf = KFold(n_splits=args['kfold'], random_state=args['rs'], shuffle=True)
    config = read_config()
    print('Load MC: {}'.format(config['IC_MC']['path']))
    mc = np.load(str(config['IC_MC']['path']))[:]
    mc = mc_cut(mc)
    if args['weights'] == 'pl':
        weights = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'],
                                       gamma=args['gamma'])
    elif args['weights'] == 'conv':
        weights = mc['conv']
    elif args['weights'] == 'conv+pl':
        diff_weight = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'],
                                           gamma=args['gamma'])
        weights = mc['conv'] + diff_weight
        print('Rates [1/yr]:')
        print(np.sum(mc['conv']) * np.pi * 1e7)
        print(np.sum(diff_weight) * np.pi * 1e7)
    else:
        print('{} is not a valid weights argument'.format(args['weights']))
        sys.exit(0)
    mc = append_fields(mc, 'cur_weight', weights)
    args['weights'] = 'default'
    model, mname = load_model(args['model'])
    bw_dict = dict()
    for i, key in enumerate(args['bw_key']):
        bw_dict[key] = args['bw'][i]
    lh_arr, zero_arr = [], []
    for train_index, val_index in kf.split(mc):
        args['no_save'] = True
        res_dict = create_KDE(args, mc=mc[train_index], bws=bw_dict)
        mc_val = mc[val_index]
        val_settings, grid = model.setup_KDE(mc_val)
        lh, zeros = do_validation(res_dict, val_settings, mc_val['cur_weight'])
        print('Number of zeros {}'.format(zeros))
        print('Likelihood Value {}'.format(lh))
        zero_arr.append(zeros)
        lh_arr.append(lh)
    fname = ''
    for i in range(len(args['bw'])):
        fname += '{}_{}_'.format(args['bw_key'][i], args['bw'][i])
    fname = fname[:-1] + '.npy'
    odict = {'zeros': zero_arr, 'lh': lh_arr}
    np.save(os.path.join(args['outfolder'], fname), odict)
Exemplo n.º 39
0
def AddColumns(arr, columnName):
    types = np.unique(arr[columnName])

    for type in types:
        arr = rfn.append_fields(arr,
                                names=columnName + "_" + type,
                                data=-np.ones(N),
                                usemask=False)

    for r in arr:
        for type in types:
            if r[columnName] == type:
                r[columnName + '_' + type] = 1
                break

    return rfn.drop_fields(arr, drop_names=columnName)
Exemplo n.º 40
0
def add_groups(g, group_dict):
    """ Add group info to a sGraph object, if already presents it raises a warning.
    """
    if hasattr(g, 'gv'):
        msg = 'Group info already present, will overwrite.'
        warnings.warn(msg, UserWarning)

    g.gv = graphs.GroupVertexList()
    g.group_dict = group_dict
    g.num_groups = len(group_dict)
    num_bytes = mt.get_num_bytes(g.num_groups)
    g.group_dtype = np.dtype('u' + str(num_bytes))
    if 'group' in g.v.dtype.names:
        g.v.group = group_dict
    else:
        g.v = append_fields(g.v, 'group', group_dict)
Exemplo n.º 41
0
    def __getattr__(self, attrname):

        # See __init__ for column names
        try:
            return self.data[attrname]
        except:
            pass

        # If we got here, stations column wasn't in file.
        #   Try getting it from station mask.
        if attrname == 'stations':
            stations = self.hexMaskToStationCount()
            self.data = append_fields(self.data, ('stations', ), (stations, ))
            return stations

        return None
Exemplo n.º 42
0
    def compute_xt(self):
        """
        Compute dimensionless curvilinear distance `xt` (from 0 to 1)
        /!\ Column `Xt` has to exist
        """
        if len(self.array) > 1:
            xt = (self.array['Xt'] - self.array['Xt'][0])/(self.array['Xt'][-1] - self.array['Xt'][0])
        else:
            xt = np.empty(len(self.array))
            xt.fill(-999.)

        # Update or append `xt` array column
        if 'xt' in self.array.dtype.fields:
            self.array['xt'] = xt
        else:
            self.array = append_fields(self.array, 'xt', xt, usemask=False)
Exemplo n.º 43
0
    def __setitem__(self, key, value):
        value = np.asanyarray(value)

        # Filtered data handling (autofilled with 0)
        if len(value) == self.size:
            full_value = np.zeros(len(self._rdata), dtype=value.dtype)
            full_value[~self._rdata['mask']] = value
            value = full_value
        elif len(value) != len(self._rdata):
            raise ValueError('wrong dimension')

        if key in self.names: #Update
            self._rdata[key] = value
        else: #Add
            self._rdata = recfunctions.append_fields(self._rdata, names=key,
                                                     data=value, usemask=False)
Exemplo n.º 44
0
    def strength(self, get=False):
        """ Compute the undirected strength sequence.

        If get is true it returns the array otherwise it adds the result to v.
        """
        if 'strength' in self.v.dtype.names:
            strength = self.v.strength
        else:
            strength = mt.compute_strength(self.e, self.num_vertices)
            self.v = append_fields(self.v,
                                   'strength',
                                   strength,
                                   dtypes=np.float64)

        if get:
            return strength
Exemplo n.º 45
0
    def in_strength(self, get=False):
        """ Compute the out strength sequence.

        If get is true it returns the array otherwise it adds the result to v.
        """
        if 'in_strength' in self.v.dtype.names:
            s_in = self.v.in_strength
        else:
            s_out, s_in = mt.compute_in_out_strength(self.e, self.num_vertices)

            self.v = append_fields(self.v, ['out_strength', 'in_strength'],
                                   (s_out, s_in),
                                   dtypes=[np.float64, np.float64])

        if get:
            return s_in
Exemplo n.º 46
0
    def getChromosomeBySample(self, sampleID, chromosome, getexons=True):
        d = rpkm_data()
        data_tbl = self.h5file.root._f_getChild("chr" + str(chromosome))
        sample_tbl = data_tbl._f_getChild("sample_" + sampleID)
        d.rpkm = sample_tbl.read(field="rpkm")
        if getexons:
            probe_tbl = self.h5file.root.probes._f_getChild("probes_chr" +
                                                            str(chromosome))
            d.exons = probe_tbl.read()
            d.exons = rfn.append_fields(d.exons,
                                        "chrom",
                                        np.repeat(chromosome, len(d.exons)),
                                        usemask=False)
            d.contig = chromosome

        return d
Exemplo n.º 47
0
def process_data(statData):
    # calculate heritability time
    tauHer = mlsg.calc_tauHer_numeric(statData['n0'], statData['mig'])
    tauVar = mlsg.calc_tauV(statData['cost'])
    tauHerRel = tauHer / statData['TAU_H']
    tauVar_rel = tauVar / statData['TAU_H']
    BH_cat = mlsg.make_categorial(statData['B_H'])
    dataToStore = (tauHer, tauVar, tauHerRel, tauVar_rel, BH_cat)
    nameToStore = ('tauHer', 'tauVar', 'tauHer_rel', 'tauVar_rel', 'BH_cat')

    statData = rf.append_fields(statData,
                                nameToStore,
                                dataToStore,
                                usemask=False)

    return statData
Exemplo n.º 48
0
def load_ill_data(filenumbers, prefix, monitor='M1'):
    """
    Loads one or several ILL data files and returns a single structured array
    """
    if type(filenumbers) is int:
        d = load_ill_ascii(prefix + str(filenumbers))
    else:
        d = load_ill_ascii(prefix + str(filenumbers[0]))
        for f in filenumbers[1::]:
            d = np.append(d, load_ill_ascii(prefix + str(f)))
    
    I = d['CNTS']/d['M1']
    err = np.sqrt(d['CNTS'])/d['M1']

    d = append_fields(d, ['I', 'err'], [I, err])
    return d
Exemplo n.º 49
0
def make_SHAM_mock(mock,
                   P_xy,
                   mock_prop='mvir',
                   gal_prop='mstar',
                   use_log_mock_prop=True):
    """
    make a SHAM mock given a halo catalogue.  
    
    Parameters
    ==========
    mock: array_like
        structured array containing halo catalogue
    
    P_xy: function
        probability function that returns probability of x_gal given y_halo
    
    mock_prop: string
        key into mock which returns the halo property to build the SHAM mock
    
    Returns
    =======
    mock: structured array
        mock with new column containing galaxy property gal_prop
        
    Notes
    =====
    The probability of galaxy property 'x' given a halo with property 'y', 
    where mock[mock_prop] returns halo property 'y'.
    """

    from numpy.lib.recfunctions import append_fields

    mock = mock.view(np.recarray)

    if use_log_mock_prop == True:
        y = np.log10(mock[mock_prop])
    else:
        y = mock[mock_prop]

    x = P_xy(y).rvs(len(mock))

    if gal_prop in mock.dtype.names:
        mock[gal_prop] = x
    else:
        mock = append_fields(mock, gal_prop, x)

    return mock
    def _select_and_weight(self,
                           N=0,
                           gamma=-2,
                           source={
                               'ra': np.pi / 2,
                               'dec': np.pi / 6
                           },
                           time_profiles=None,
                           sampling_width=np.radians(1)):
        '''Prune the simulation set to only events close to a given source and calculate the
            weight for each event. Add the weights as a new column to the simulation set

            time_profiles should be a list of tuples. the first element in each tuple
            should be a time profile, and the second should be the proportion of total
            events in that time profile.
        '''
        assert ('ow' in self.sim.dtype.names)
        assert (time_profiles != None)

        # Pick out only those events that are close in
        # declination. We only want to sample from those.
        sindec_dist = np.abs(source['dec'] - self.sim['trueDec'])
        close = sindec_dist < sampling_width

        reduced_sim = rf.append_fields(self.sim[close].copy(),
                                       'weight',
                                       np.zeros(close.sum()),
                                       dtypes=np.float32)

        # Assign the weights using the newly defined "time profile"
        # classes above. If you want to make this a more complicated
        # shape, talk to me and we can work it out.
        reduced_sims = np.array([reduced_sim.copy() for _ in time_profiles])
        for i, time_profile in enumerate(time_profiles):
            effective_livetime = time_profile[0].effective_exposure()
            reduced_sims[i]['weight'] = time_profile[1]*reduced_sims[i]['ow'] *\
                                        N * (reduced_sims[i]['trueE']/100.e3)**gamma *\
                                        effective_livetime * 24 * 3600.

            # Apply the sampling width, which ensures that we
            # sample events from similar declinations.
            # When we do this, correct for the solid angle
            # we're including for sampling
            omega = 2*np.pi * (np.min([np.sin(source['dec']+sampling_width), 1]) -\
                               np.max([np.sin(source['dec']-sampling_width), -1]))
            reduced_sims[i]['weight'] /= omega
        return reduced_sims
Exemplo n.º 51
0
def appendSumKernels(csvData, columnPrefixes):
    '''Calculate the sum of matching pixels from different maps. Maps indicated by columnPrefixes. 
    Append sum as column to structured array'''

    #examine column headers for common kernels
    headers = []
    kernels = []
    for ind, prefix in enumerate(columnPrefixes):
        headers.append(
            list(
                filter(lambda x: x.startswith(prefix.upper()),
                       csvData.dtype.names)))
        kernels.append([])
        for i in headers[ind]:
            kernels[ind].append(i.split(prefix)[1])

    #confirm common kernels for all maps, then append sum for each kernel
    swap = np.transpose(kernels)
    check_common = all(
        all(x == swap[i][0] for x in swap[:][i])
        for i in range(len(kernels[0])))

    if check_common:
        #append new headers for sum calculation
        addHeaders = [
            "_".join(["SUM"] + columnPrefixes + [i.strip("_")])
            for i in kernels[0]
        ]
        csvData = append_fields(
            csvData,
            addHeaders,
            data=[np.zeros(csvData.size) for i in addHeaders],
            dtypes='f8')

        for ind, row in enumerate(csvData):
            for h in addHeaders:
                headers_to_sum = filter(lambda x: x.endswith(h[-1]),
                                        np.asarray(headers).flatten())
                sum = 0
                for i in headers_to_sum:
                    sum += int(row[i])
                csvData[h][ind] = sum

    else:
        sys.exit("Cannot append Kernel Sum; Headers in unfamiliar format.")

    return csvData, '_'.join(addHeaders[0].split('_')[:-1])
Exemplo n.º 52
0
    def zlim_from_effi(self, effiInterp, zplot):
        """
        Method to estimate the redshift limit from efficiency curves
        The redshift limit is defined here as the redshift value beyond
        which efficiency decreases up to zero.
        Parameters
        ---------------
        effiInterp: interpolator
         use to get efficiencies
        zplot: numpy array
          redshift values
        Returns
        -----------
        zlimit: float
          the redshift limit
        """

        # get efficiencies
        effis = effiInterp(zplot)
        # select data with efficiency decrease
        idx = np.where(np.diff(effis) < -0.005)[0]

        # Bail out if there is no data
        if np.size(idx) == 0:
            return 0

        z_effi = np.array(zplot[idx],
                          dtype={
                              'names': ['z'],
                              'formats': [np.float]
                          })
        # from this make some "z-periods" to avoid accidental zdecrease at low z
        z_gap = 0.05
        seasoncalc = np.ones(z_effi.size, dtype=int)
        diffz = np.diff(z_effi['z'])
        flag = np.where(diffz > z_gap)[0]

        if len(flag) > 0:
            for i, indx in enumerate(flag):
                seasoncalc[indx + 1:] = i + 2
        z_effi = rf.append_fields(z_effi, 'season', seasoncalc)

        # now take the highest season (end of the efficiency curve)
        idd = z_effi['season'] == np.max(z_effi['season'])
        zlimit = np.min(z_effi[idd]['z'])

        return zlimit
Exemplo n.º 53
0
    def normalize_profile(self, params=None):

        ndata = self.data.copy()

        scale = 1.0 / np.mean(ndata[self.reference]["intensity"])

        ndata["intensity"] *= scale
        ndata["error"] *= abs(scale)

        if params is not None:
            values = self.calculate_profile(params) * scale
            ndata = rfn.append_fields(ndata,
                                      "intensity_calc",
                                      values,
                                      usemask=False)

        return ndata, scale
Exemplo n.º 54
0
 def neutralize_np(self, df, industry_set):
     from numpy.lib.recfunctions import append_fields
     for i in range(len(industry_set)):
         df = append_fields(
                             df, 
                             industry_set[i],
                             [0]*df.size,
                             [int],
                             usemask=False
                             )
 
         industry_stock = get_industry_stocks(industry_set[i])
         for j in range(df.size):
             if df['stock_code'][j] in industry_stock:
                 df[industry_set[i]][j] = 1
                 
     return df   
Exemplo n.º 55
0
def BASS_sensitivity_filter(path, data, rcat, survey, use_lognlogs=True):

    flux_arr = data['flux']
    n_rand = len(rcat)

    #generate random fluxes
    log_flux_grid = np.linspace(min(np.log10(flux_arr)),
                                max(np.log10(flux_arr)), 1000)

    if use_lognlogs is True:
        lognlogs = get_lognlogs(path)
        kdepdff = 10**lognlogs(log_flux_grid) / np.sum(10**
                                                       lognlogs(log_flux_grid))

    else:
        kde = weighted_gaussian_kde(np.log10(flux_arr),
                                    bw_method=0.1,
                                    weights=None)
        kdepdff = kde.evaluate(log_flux_grid)

    log_fluxr_arr = generate_rand_from_pdf(pdf=kdepdff,
                                           num=n_rand,
                                           x_grid=log_flux_grid)
    fluxr_arr = 10**(log_fluxr_arr)

    rcat = append_fields(rcat, 'flux', fluxr_arr)

    smaps, wcses = get_BASSsmap(path + 'sensitivity_maps/', survey)

    #filter based on sensitivity
    good = []
    for i, r in enumerate(rcat):
        l = r['l']
        b = r['b']
        flux = r['flux']  #ergs/s/cm^2
        px, py, sind = BASSmap_ind(l, b, wcses)
        sens_map = smaps[sind]
        try:
            sensitivity = sens_map[px, py] * 2.39e-8 * 4.8  #in ergs/s/cm^-2
        except IndexError:
            print(l, b)
        if flux > sensitivity:
            good = np.append(good, i)
    randoms = rcat[good.astype(int)]

    return randoms
Exemplo n.º 56
0
    def degree(self, get=False):
        """ Compute the undirected degree sequence.

        If get is true it returns the array otherwise it adds the result to v.
        """
        if 'degree' in self.v.dtype.names:
            degree = self.v.degree
        else:
            degree = mt.compute_degree(self.e, self.num_vertices)
            dtype = 'u' + str(mt.get_num_bytes(np.max(degree)))
            self.v = append_fields(self.v,
                                   'degree',
                                   degree.astype(dtype),
                                   dtypes=dtype)

        if get:
            return degree
Exemplo n.º 57
0
    def in_degree(self, get=False):
        """ Compute the out degree sequence.

        If get is true it returns the array otherwise it adds the result to v.
        """
        if 'in_degree' in self.v.dtype.names:
            d_in = self.v.in_degree
        else:
            d_out, d_in = mt.compute_in_out_degree(self.e, self.num_vertices)
            dtype = 'u' + str(
                mt.get_num_bytes(max(np.max(d_out), np.max(d_in))))
            self.v = append_fields(self.v, ['out_degree', 'in_degree'],
                                   (d_out.astype(dtype), d_in.astype(dtype)),
                                   dtypes=[dtype, dtype])

        if get:
            return d_in
Exemplo n.º 58
0
    def extrapolate_ws(self, h1, h):
        """
        Extrapolates a windspeed profile using the power law coefficient
        at `self.alpha`.

        Parameters
        ----------
        h1 : int | float
            Measurement height.
        h : int | float
            Desired profile height.
        """

        ts1 = self.state[f"windspeed_{h1}m"]
        ts = ts1 * (h / h1)**self.alpha

        self.state = np.array(append_fields(self.state, f"windspeed_{h}m", ts))
Exemplo n.º 59
0
 def _update_catalog_ephem(self):
     if not self._has_all_eph_keys(self._imeta):
         sys.stderr.write("_imeta missing required eph keys!\n")
         return
     tdata = self._imcat.copy()
     nobjs = len(tdata)
     for kk in _EPH_KEYS:
         vec = np.zeros(nobjs, dtype='float') + self._imeta[kk]
         if kk in tdata.dtype.names:
             sys.stderr.write("Column %s exists ... updating!\n" % kk)
             tdata[kk] = vec
         else:
             sys.stderr.write("Column %s not found ... adding!\n" % kk)
             tdata = append_fields(tdata, kk, vec, usemask=False)
         pass
     self._imcat = tdata
     return
Exemplo n.º 60
0
    def set_backexchange(self, back_exchange):
        """
        Sets the normalized percentage of uptake through a fixed backexchange value for all peptides.

        Parameters
        ----------
        back_exchange :  :obj:`float`
            Percentage of back exchange

        """

        back_exchange /= 100
        rfu = self.data['uptake'] / ((1-back_exchange)*self.data['ex_residues'])

        uptake_corrected = self.data['uptake'] / (1 - back_exchange)

        self.data = append_fields(self.data, ['rfu', 'uptake_corrected'], data=[rfu, uptake_corrected], usemask=False)