Exemplo n.º 1
0
    def __init__(self, da_sequence,
                 # pil_image_generator TODO better using generator
                 position=(0, 0),
                 colours = _colour.ImageColours(),
                 antialiasing=None,
                 make_pil_images_now=False,
                 multiprocessing=False):

        if not isinstance(colours, _colour.ImageColours):
            raise ValueError("Colours must be a ImageColours instance")

        self.da_sequence = da_sequence
        self.stimuli = []
        self.position = position
        self.antialiasing = antialiasing

        for da in self.da_sequence.dot_arrays:
            stim = ExprimentDotArray(dot_array=da, position=position,
                                     colours=colours,
                                     antialiasing=antialiasing)
            self.stimuli.append(stim)

        if make_pil_images_now:

            if not multiprocessing:
                list(map(lambda x: x._create_pil_image(), self.stimuli))
                self._make_image_process = None
            else:
                p = _Pool()

                for c, pil_im in enumerate(p.imap(ExpyrimentDASequence._make_stimuli_map_helper, self.stimuli)):
                    self.stimuli[c]._image = pil_im
                p.close()
                p.join()
Exemplo n.º 2
0
def gd2e(gd2e_table, project_name, num_cores, tqdm, cache_path):
    '''We should ignore stations_list as we already selected stations within merge_table'''
    # try:
    if gd2e_table[gd2e_table['file_exists'] == 0].shape[0] == 0:
        print('{} already processed'.format(project_name))
    else:
        gd2e_table = gd2e_table[gd2e_table['file_exists'] == 0].to_records(
        )  #converting to records in order for mp to work properly as it doesn't work with pandas Dataframe
        num_cores = num_cores if gd2e_table.shape[
            0] > num_cores else gd2e_table.shape[0]
        print(
            'Processing {} |  # files left: {} | Adj. # of threads: {}'.format(
                project_name, gd2e_table.shape[0], num_cores))

        with _Pool(processes=num_cores) as p:
            if tqdm:
                list(
                    _tqdm.tqdm_notebook(p.imap(_gd2e, gd2e_table),
                                        total=gd2e_table.shape[0]))
            else:
                p.map(_gd2e, gd2e_table)  #investigate why list is needed.

    # except:
    print('cleaning IONEX from RAM as exiting')
    #cleaning after execution
    IONEX_cached_path = _os.path.join(cache_path, 'IONEX_merged')
    _rmtree(IONEX_cached_path)
Exemplo n.º 3
0
def _pool_map(n_core, parallel_function, n_sample_u, shape_latent,
              shared_latent):
    with _Pool(processes=n_core,
               initializer=_init_parallel,
               initargs=(shared_latent, shape_latent)) as pool:
        pool.map(parallel_function, iterable=_np.arange(n_sample_u))
        latent = _np.frombuffer(shared_latent,
                                dtype=_np.float64).reshape(shape_latent)
    return latent
Exemplo n.º 4
0
def _gather_tdps(station_files,num_cores,tqdm):
    '''Processing extraction in parallel 
    get_tdps_pandas,numpy'''
    num_cores = num_cores if station_files.shape[0] > num_cores else station_files.shape[0]
    #  chunksize = int(np.ceil(len(station_files) / num_cores)) #20-30 is the best
    chunksize = 20
 
    with _Pool(processes = num_cores) as p:
        if tqdm: data = list(_tqdm.tqdm_notebook(p.imap(_get_tdps_npz, station_files,chunksize=chunksize), total=station_files.shape[0]))
        else: data = p.map(_get_tdps_npz, station_files,chunksize=chunksize)
    return data
Exemplo n.º 5
0
def igs2jpl(begin,end,products_type,products_dir,tqdm,num_cores=None,run_dir = '/run/user/1017/'):
    #products_dir = '/mnt/data/bogdanm/Products/CODE/source/MGEX/'
    sets = _gen_sets(begin,end,products_type,products_dir,run_dir = run_dir)
    sets = sets.to_records()
    
    with _Pool(num_cores) as p:
        if tqdm: list(_tqdm.tqdm_notebook(p.imap(_sp3ToPosTdp, sets), total=sets.shape[0]))
        else: p.map(_sp3ToPosTdp, sets)
    

    tmp_dir = _os.path.join(run_dir,'tmp_igs2jpl') #creating tmp directory processes will work in
    try:_rmtree(tmp_dir) #clearing memory before processing
    except: print('Could not remove tmp')
Exemplo n.º 6
0
def ce2cm(init_ce_path,num_cores = 10,tqdm=True):
    cache='/run/user/1017/'
    cache_path = _os.path.join(cache,'ce2cm_cache')
    if not _os.path.exists(cache_path): _os.makedirs(cache_path)
    
    init_ce_path = _os.path.abspath(init_ce_path) 
    cm_dirname = _os.path.basename(init_ce_path)+'_cm'
    init_cm_path = _os.path.join(_os.path.dirname(init_ce_path),cm_dirname)
    if _os.path.exists(init_cm_path):
        print('CM folder exists. Removing.')
        _rmtree(init_cm_path)
    print('Copying {} to {}'.format(_os.path.basename(init_ce_path),cm_dirname))
    
    
#     dst = _copytree(src=init_ce_path,dst=init_cm_path)
    print('Finished copying to {}'.format(init_cm_path))
#     pos_files = _glob.glob(init_cm_path+'/*/*pos.gz')
#     print('Found {} pos files. Running'.format(len(pos_files)))
    
    #files to make symlinks
    product_files = _pd.Series(_glob.glob(init_ce_path+'/*/*.gz'))
    product_file_names_df = product_files.str.split('/',expand=True).iloc[:,-1].str.split('.',expand=True)
    symlink_files = product_files[product_file_names_df[1] != 'pos'].to_list()
    # files to copy (.pos)
    pos_files = product_files[product_file_names_df[1] == 'pos'].to_list()
    
    basedir = _os.path.abspath(_os.path.join(symlink_files[0],_os.pardir,_os.pardir,_os.pardir))
    files_symlinks = _pd.Series(symlink_files).str.split('/',expand=True).iloc[:,-3:]
    symlink_src = (basedir + '/' + files_symlinks.iloc[:,0]+'/'+files_symlinks.iloc[:,1]+'/'+files_symlinks.iloc[:,2])
    symlink_dst = (basedir + '/' + files_symlinks.iloc[:,0]+'_cm/'+files_symlinks.iloc[:,1]+'/'+files_symlinks.iloc[:,2])

    year_dirs = basedir + '/' + files_symlinks.iloc[:,0][0]+'_cm/' + files_symlinks.iloc[:,1].unique()
    for dir_path in year_dirs:
        if not _os.path.exists(dir_path): _os.makedirs(dir_path)
    print('creating symlinks for products files (except for *.pos.gz)')
    for i in range(len(symlink_src)):
        _os.symlink(src=_os.path.relpath(path=symlink_src[i],start=_os.path.dirname(symlink_dst[i])),dst=symlink_dst[i])
    
    files_pos = _pd.Series(pos_files).str.split('/',expand=True).iloc[:,-3:]
    pos_src = (basedir + '/' + files_pos.iloc[:,0]+'/'+files_pos.iloc[:,1]+'/'+files_pos.iloc[:,2])
    pos_dst = (basedir + '/' + files_pos.iloc[:,0]+'_cm/'+files_pos.iloc[:,1]+'/'+files_pos.iloc[:,2])
    cache_path_series = _np.ndarray(pos_src.shape,dtype=object)
    cache_path_series.fill(cache_path)
    pos_path_series = _pd.concat([pos_src,pos_dst,_pd.Series(cache_path_series)],axis=1).values
#     return pos_path_series
    with _Pool(processes = num_cores) as p:
        if tqdm: list(_tqdm.tqdm_notebook(p.imap(_ce2cm_single_thread, pos_path_series), total=len(pos_path_series)))
        else: p.map(_ce2cm_single_thread, pos_path_series)
    _rmtree(path=cache_path)
Exemplo n.º 7
0
def dr_merge(merge_table, num_cores, tqdm):
    '''merge_table is the output of get_merge_table(). Merges all that is of class 3 as merge_table stores only files that are actual'''
    num_cores = int(num_cores)  #safety precaution if str value is specified
    df_class3 = merge_table[['begin', 'path_prev', 'path', 'path_next'
                             ]][merge_table['completeness'] == 3].copy()

    df_class3['merge_begin'] = (
        df_class3['begin'].astype('datetime64[D]') -
        _np.timedelta64(3, '[h]') -
        J2000origin).astype('timedelta64[s]').astype(int)
    df_class3['merge_end'] = (df_class3['begin'].astype('datetime64[D]') +
                              _np.timedelta64(27, '[h]') -
                              J2000origin).astype('timedelta64[s]').astype(int)
    # merging to 2:55:00 df_class3['merge_end'] = (df_class3['begin'].astype('datetime64[D]')  + _np.timedelta64( 27,'[h]') - _np.timedelta64( 5,'[m]') -J2000origin).astype('timedelta64[s]').astype(int)

    merge_table_class3 = df_class3[[
        'merge_begin', 'merge_end', 'path_prev', 'path', 'path_next'
    ]]

    # check if merged version already exists
    ifexists = _np.zeros((merge_table_class3.shape[0]))
    merged_paths = merge_table_class3['path'] + '.30h'

    for i in range(merged_paths.shape[0]):
        ifexists[i] = _os.path.isfile(merged_paths.values[i])
    ifexists = ifexists.astype(bool)

    merge_table_class3_run = merge_table_class3[~ifexists]
    if (merge_table_class3[~ifexists]).shape[0] == 0:
        print('All merge files present')

    else:
        num_cores = num_cores if merge_table_class3_run.shape[
            0] > num_cores else merge_table_class3_run.shape[0]

        print('Number of files to merge:', merge_table_class3_run.shape[0],
              '| Adj. num_cores:', num_cores)

        with _Pool(processes=num_cores) as p:
            if tqdm:
                list(
                    _tqdm.tqdm_notebook(p.imap(
                        _merge, merge_table_class3_run.to_records()),
                                        total=merge_table_class3_run.shape[0]))
            else:
                p.map(_merge, merge_table_class3_run.to_records())
Exemplo n.º 8
0
def gen_penna_tdp(tmp_path,
            staDb_path,
            tqdm,
            period=13.9585147, # Penna, N. T. et al. (2015) p.6526
            num_cores = 25,
            A_East=2, A_North=4, A_Vertical=6):
    '''
    1. Read staDb file (staDb has to have information on all the stations in the dataset)
    2. Extract stations names and positions. Create rot for each station.
    3. Glob all tdp files
    4. Loop throught tdp files list. Read each file. All years and DOYs that are present in the directory!!!
    5. For each file extract time values. Generate synth waves.
    6. Rotate for each station [in staDb?] and create tdp output lines for each station. staDb is generated on the fly from the list of stations fetched
    7. Concatanate outputs and append to input tdp file 
    '''
    files = _np.asarray(sorted(_glob.glob(tmp_path+'/tropNom/*/*/30h_tropNominalOut_VMF1.tdp')))
    num_cores = num_cores if len(files) > num_cores else len(files)
    
    ref_xyz_df = get_ref_xyz(staDb_path)
    rot = get_rot(ref_xyz_df)

    aux = _np.empty((files.shape[0],6),dtype = object) #population array [xyz_staDb_data, period, A_East, A_North, A_Vertical, rot]
    aux[:] = [ref_xyz_df,period,A_East, A_North, A_Vertical,rot]

    print('Number of files to be processed:', len(files),
          '\nAdjusted number of cores:', num_cores)
    np_set = _np.column_stack((files,aux))
    '''
    np_set[0]
    ['/mnt/Data/bogdanm/tmp_GipsyX/tropNom/2003/001/30h_tropNominalOut_VMF1.tdp',
         Station             X            Y             Z
    0    BRAE  3.475467e+06 -206213.0000  5.326645e+06
    1    LOFT  3.706041e+06  -55853.0000  5.173496e+06
    2    WEAR  3.686877e+06 -143592.0000  5.185648e+06
    3    CAMB  4.071647e+06 -379677.1000  4.878479e+06
    4    HERT  4.033461e+06   23537.6625  4.924318e+06
    5    LERW  3.183055e+06  -65838.5000  5.508326e+06
    6    NEWL  4.079954e+06 -395930.4000  4.870197e+06
    7    SHEE  3.983074e+06   51683.0000  4.964640e+06,
       13.9585147, 2, 4, 6]'''

    with _Pool(processes = num_cores) as p:
        if tqdm: list(_tqdm.tqdm_notebook(p.imap(_gen_penna_tdp_file, np_set), total=np_set.shape[0]))
        else: p.map(_gen_penna_tdp_file, np_set)
Exemplo n.º 9
0
def create(index, name=config.get("cloudmesh.azure.username")):
    global vmName, vmImage, vmPassword
    vmImage = config.get("cloudmesh.azure.image")
    userName = config.get("cloudmesh.azure.username")
    vmPassword = config.get("cloudmesh.azure.password")
    vmName = name

    if vmImage == "":
        print ("image name not set, use set:image,value)")
    elif userName == "":
        print ("image name not set, use set:username,value)")
    elif vmPassword == "":
        print ("image name not set, use set:password,value)")
    else:
        """Creates a number of vms with the labels prefix-0 to prefix-<number-1>. It uses a threadpool"""
        pool = _Pool(processes=maxparallel)
        list = range(0, int(index))
        result = pool.map(_boot, list)
        # _boot(index)
        print result
Exemplo n.º 10
0
def create(index, name=config.get('azure', 'username')):
    global vmName, vmImage, vmPassword
    vmImage = config.get('azure', 'image')
    userName = config.get('azure', 'username')
    vmPassword = config.get('azure', 'password')
    vmName = name

    if(vmImage == ''):
        print('image name not set, use set:image,value)')
    elif(userName == ''):
        print('image name not set, use set:username,value)')
    elif(vmPassword == ''):
        print('image name not set, use set:password,value)')
    else:
        """Creates a number of vms with the labels prefix-0 to prefix-<number-1>. It uses a threadpool"""
        pool = _Pool(processes=maxparallel)
        list = range(0, int(index))
        result = pool.map(_boot, list)
        # _boot(index)
        print result
Exemplo n.º 11
0
def create(index, name=config.get('cloudmesh.azure.username')):
    global vmName, vmImage, vmPassword
    vmImage = config.get('cloudmesh.azure.image')
    userName = config.get('cloudmesh.azure.username')
    vmPassword = config.get('cloudmesh.azure.password')
    vmName = name

    if (vmImage == ''):
        print('image name not set, use set:image,value)')
    elif (userName == ''):
        print('image name not set, use set:username,value)')
    elif (vmPassword == ''):
        print('image name not set, use set:password,value)')
    else:
        """Creates a number of vms with the labels prefix-0 to prefix-<number-1>. It uses a threadpool"""
        pool = _Pool(processes=maxparallel)
        list = range(0, int(index))
        result = pool.map(_boot, list)
        # _boot(index)
        print result
Exemplo n.º 12
0
def jpl2merged_orbclk(begin,end,GNSSproducts_dir,num_cores=None,h24_bool=True,makeShadow_bool=True,tqdm=True,run_dir = '/run/user/1017/'):
    '''GipsyX can only merge daily products so effectively we end up having 3 days merged (72 hours, centre 24 +- 24) and not 30 hours'''
    begin64 = _np.datetime64(begin).astype('datetime64[D]')
    end64 = _np.datetime64(end).astype('datetime64[D]')
    products_day = _np.arange(begin64,end64)
    products_begin = ((products_day - _np.timedelta64(3,'h')) - _J2000origin).astype(int)
    products_end = (products_day + _np.timedelta64(27,'h') - _J2000origin).astype(int)
    #rewriting 1st and last values. These are 27 hour products precisely according to boundaries specified
    products_begin[0] = (products_day[0] - _J2000origin).astype(int)
    products_end[-1] = (products_day[-1] + _np.timedelta64(24,'h') - _np.timedelta64(5,'m')- _J2000origin).astype(int)

    year_str =  (_pd.Series(products_day).dt.year).astype(str)
    
    output_merged_dir = _os.path.abspath(GNSSproducts_dir)
    target_path = _os.path.abspath(_os.path.join(output_merged_dir,_os.pardir,_os.pardir,'init',_os.path.basename(output_merged_dir)))
    if _os.path.exists(target_path):
        _rmtree(target_path)
        
    target_dir = target_path +'/' + year_str
    for dir in target_dir.unique(): #creating folder structure before conversion
        _os.makedirs(dir)
    
    repository = _np.ndarray((products_day.shape),object)
    h24 = _np.ndarray((products_day.shape),bool)
    makeShadow = _np.ndarray((products_day.shape),bool)
    
    tmp_merge_path = _os.path.abspath(run_dir)+ '/tmp_merge/'
    run = tmp_merge_path +_pd.Series(products_day).astype(str)
    # Need to clear run before new execution just in case
    if _os.path.exists(tmp_merge_path) : _rmtree(tmp_merge_path)
  
    repository.fill(GNSSproducts_dir)
    h24.fill(h24_bool)
    makeShadow.fill(makeShadow_bool)
    
    input_sets = _np.column_stack([products_begin,products_end,repository,target_dir,h24,makeShadow,products_day,run])

    with _Pool(processes = num_cores) as p:
        if tqdm: list(_tqdm.tqdm_notebook(p.imap(_gen_orbclk, input_sets), total=input_sets.shape[0]))
        else: p.map(_gen_orbclk, input_sets)
    _rmtree(tmp_merge_path) #cleaning
Exemplo n.º 13
0
def rnx2dr(selected_df, num_cores, tqdm, cache_path, staDb_path, cddis=False):
    '''Runs rnxEditGde.py for each file in the class object in multiprocessing'''
    #Checking files that are already in place so not to overwrite
    print('staDb_path:', staDb_path)
    if_exists_array = _np.ndarray((selected_df.shape[0]), dtype=bool)
    for i in range(if_exists_array.shape[0]):
        if_exists_array[i] = not _os.path.exists(selected_df['dr_path'][i])
    selected_df = selected_df[if_exists_array]

    selected_df2convert = selected_df[['rnx_path', 'dr_path']].copy()
    selected_df2convert[
        'cache_path'] = cache_path  #populating df with cache path value
    selected_df2convert[
        'staDb_path'] = staDb_path  #populating with staDb_path which is needed as rnx files may lack receiver information
    selected_df2convert = selected_df2convert.values

    if selected_df2convert.shape[0] > 0:
        num_cores = num_cores if selected_df2convert.shape[
            0] > num_cores else selected_df2convert.shape[0]
        print('Number of files to process:',
              selected_df2convert.shape[0],
              '| Adj. num_cores:',
              num_cores,
              end=' ')

        with _Pool(processes=num_cores) as p:
            if tqdm:
                list(
                    _tqdm.tqdm_notebook(p.imap(_2dr, selected_df2convert),
                                        total=selected_df2convert.shape[0]))
            else:
                p.map(_2dr, selected_df2convert)
    else:
        #In case length of unconverted files array is 0 - nothing will be converted
        print(
            'RNX files converted.\nNothing to convert. All available rnx files are already converted'
        )
Exemplo n.º 14
0
    def get_ionex_data(self,in_sets):
        

        num_cores = self.num_cores if len(in_sets) > self.num_cores else len(in_sets)
        chunksize = int(_np.ceil(len(in_sets)/num_cores))
        # Collecting ionex maps from multiple files in parallel
        with _Pool(num_cores) as p:
            if self.tqdm:
                GIM_data = list(_tqdm.tqdm_notebook(p.imap(GIM_data_extraction, in_sets[:,[3,4]]), total=in_sets.shape[0]))
            else:
                GIM_data = p.map(GIM_data_extraction, in_sets[:,[3,4]])
            # GIM_data_extraction expects only array with filepaths

        # Need this piece of code to kill 00 values eliminating duplicates
        for i in range(len(GIM_data)-1):
            if (GIM_data[i][0].tail(1).dt.hour == 0).iloc[0]:
                GIM_data[i].drop(GIM_data[i][0].tail(1).index[0],inplace = True)

        # Merging all data into two arrays (TEC and RMS) 
        data_GIM_final = _pd.DataFrame()
        for element in GIM_data:
            data_GIM_final = _pd.concat((data_GIM_final,element[[1,2]]))
        # Resulting array with two columns
        return data_GIM_final.values
Exemplo n.º 15
0
def gather_metadata(logs_glob_path = '/data/station_logs/station_logs_IGS/*/*.log',
                    rnx_glob_path=None,
                    num_threads=1):
    '''parses logiles found with glob expression'''
    parsed_filenames = find_recent_logs(logs_glob_path=logs_glob_path,
                                        rnx_glob_path=rnx_glob_path).values

    total = parsed_filenames.shape[0]
    if num_threads == 1:
        gather = []
        for file in tqdm(parsed_filenames,miniters=total//100,total=total):
            gather.append(parse_igs_log(file))
    else:
        with _Pool(processes=num_threads) as pool:
            gather = list(tqdm(pool.imap_unordered(parse_igs_log, parsed_filenames),
                               total=total, miniters=total//100))

    gather_raw = _np.concatenate(gather)

    rec_ant_mask = gather_raw[:, 0] != 0  # id_loc = 0, rec = 1, ant = 2
    gather_id_loc = gather_raw[~rec_ant_mask][:, 1:]
    gather = gather_raw[rec_ant_mask]

    stacked_rec_ant_dt = _np.concatenate(
        [gather[:, -3], gather[:, -2]], axis=0)

    stacked_rec_ant_dt = igslogdate2datetime64(stacked_rec_ant_dt)
    snx_date = datetime2yydoysec(stacked_rec_ant_dt)

    gather = _np.concatenate([gather, snx_date.reshape(2, gather.shape[0]).T], axis=1)
    stacked_rec_ant_dt_beg_end = stacked_rec_ant_dt.reshape(2, gather.shape[0])  # also deals with nans as no equal sign
    # same foes for station start being empty as it becomes year 2100
    valid_mask_dt = stacked_rec_ant_dt_beg_end[0] < stacked_rec_ant_dt_beg_end[1]

    bad_dt_stations = _np.unique(gather[~valid_mask_dt][:, 1])

    rec_mask = gather[:, 0] == 1
    rec_df = _pd.DataFrame( _np.delete(arr=gather[rec_mask], axis=1, obj=[0, 6, 7, 8]),
                            columns=['CODE','RECEIVER','GNSS','S/N','FW','BEGIN_RAW','END_RAW',
                                    'PATH','BEGIN_SNX','END_SNX'])
    ant_df = _pd.DataFrame( gather[~rec_mask][:, 1:],
                            columns=['CODE','ANTENNA','RADOME','S/N','EccU','EccN','EccE',
                                    'RADOME2','BEGIN_RAW','END_RAW','PATH','BEGIN_SNX', 'END_SNX'])

    # ID LOC
    id_loc_df = _pd.DataFrame(gather_id_loc,columns=['CODE','DOMES_N','CITY','COUNTRY',
                                                    'X','Y','Z','LAT','LON','HEI','PATH'])

    id_loc_df.CITY[id_loc_df.CITY == ''] = 'N/A'
    id_loc_df.CITY = id_loc_df.CITY.str.rstrip().str.upper()
    id_loc_df.COUNTRY = translate_series(id_loc_df.COUNTRY.str.rstrip().str.upper(),
                                        translation_country).values
    id_loc_df.DOMES_N[id_loc_df.DOMES_N == ''] = '---------'


    xyz_array = (id_loc_df[['X','Y','Z']].stack()
                            .str.replace(',','.')
                            .replace({'':None})
                            .unstack().values.astype(float))


    valid_mask = _np.all((( xyz_array != 0) & ~_np.isnan(xyz_array)),axis=1)

    xyz_norm = (xyz_array[valid_mask] ** 2).sum(axis=1) **0.5
    valid_mask[valid_mask] = (xyz_norm > 6000000) &(xyz_norm < 6500000)


    llh = xyz2llh_heik(xyz_array[valid_mask],deg=True)
    llh_snx = llh2snxdms(llh)

    llh2  = id_loc_df[~valid_mask][['LAT','LON','HEI']]
    llh2_snx = logllh2snxdms(llh2)
    snxdms = _np.empty(valid_mask.shape,dtype=object)
    snxdms[valid_mask] = llh_snx
    # snxdms[valid_mask] =' 000 00 00.0  00 00 00.0   000.0'
    snxdms[~valid_mask] = llh2_snx#
    # snxdms[~valid_mask] = ' 000 00 00.0  00 00 00.0   000.0'#llh2_snx
    # bad_loc_stations = id_loc_df.CODE[snxdms == ''].values
    id_loc_df['LOC'] = snxdms

    ecc = ant_df[['EccU','EccN','EccE']].values
    ecc[ecc == ''] = 0
    ant_df[['EccU','EccN','EccE']] = ecc.astype(float)

    rec_df.RECEIVER = rec_df.RECEIVER.str.rstrip().str.upper()
    ant_df.ANTENNA  = ant_df.ANTENNA.str.rstrip().str.upper()
    ant_df.RADOME   = ant_df.RADOME.str.rstrip().str.upper()
    ant_df.RADOME2  = ant_df.RADOME2.str.rstrip().str.upper()

    no_rad2_mask = ~ant_df.RADOME.isin(atx_rad_tbl)
    ant_df.RADOME[no_rad2_mask] = ant_df.RADOME2[no_rad2_mask]
    # translation_ant.index.name= None
    antennas = translate_series(ant_df.ANTENNA,translation_ant)
    invalid_ant_mask = ~antennas.index.isin(atx_ant_tbl)
    bad_antenna_stations = ant_df[invalid_ant_mask]['CODE'].unique()

    receivers = translate_series(rec_df.RECEIVER,translation_rec)
    invalid_rec_mask = ~receivers.index.isin(igs_rec_tbl)
    bad_rec_stations = rec_df[invalid_rec_mask]['CODE'].unique()

    radomes = translate_series(ant_df.RADOME,translation_rad)

    invalid_radomes_mask = ~radomes.index.isin(atx_rad_tbl)
    bad_radome_stations = ant_df[invalid_radomes_mask]['CODE'].unique()

    ant_df.ANTENNA  = antennas.values
    ant_df.RADOME   = radomes.values
    rec_df.RECEIVER = receivers.values

    bad_stations = _np.unique(bad_dt_stations.tolist()
                              + bad_radome_stations.tolist()
                              + bad_antenna_stations.tolist()
                              + bad_rec_stations.tolist())

    rec_df = rec_df[~rec_df.CODE.isin(bad_stations)].copy()
    ant_df = ant_df[~ant_df.CODE.isin(bad_stations)].copy()
    id_loc_df = id_loc_df[~id_loc_df.CODE.isin(bad_stations)].copy()

    return id_loc_df,rec_df,ant_df
Exemplo n.º 16
0
        while 1 :
            var = raw_input("Image : ")
            if images.has_key(var) :
                print images[var][0]
                return images[var][0]
            else :
                print "Incorrect Image name"
        return var


    def create(self, index, image=None, name)

        if(image != None):
            """Creates a number of vms with the labels prefix-0 to prefix-<number-1>. It uses a threadpool"""
            pool = _Pool(processes=maxparallel)
            list = range(0, int(index))
            result = pool.map(_boot, list)

            print result

    def _boot(self, index, name):

        #cmd = 'azure vm create %(vmname)s %(image)s %(username)s --ssh --location "East US" %(password)s' % vm
        #print cmd
        result = _vm_create("%s" % vm_name(index),
                           "%s" % name
                           "%(username)s" % self.credentials,
                           "--ssh",
                           "--location",
                           "East US",
Exemplo n.º 17
0
    def build(
        self,
        build_all: bool = False,
        target_list: _Optional[list] = None,
        number_of_threads: _Optional[int] = None,
    ):
        """Build targets of this project.

        By default, this function builds all targets in this project as well
        as all their dependencies. This function will configure all targets
        that haven't been configured in a previous call.

        Parameters
        ----------
        build_all : bool
            If set to true, will not only build all targets in this project
            and their dependencies, but also all targets of all sub-projects.
        target_list : list
            If given, will build all targets in this project that are in the
            given list, as well as all their dependencies.
        number_of_threads : int
            If given will compile targets with the given number of threads. Otherwise
            it will use the default number of CPU cores visible to Python.

        """
        # Get targets to build
        targets_to_build = self._get_targets_to_build(build_all, target_list)

        # Sort targets in build order
        build_list = [
            target for target in reversed(
                list(_nx.topological_sort(self._project_tree)))
            if (target in targets_to_build or build_all) and
            not isinstance(self._project_tree.nodes[target]["data"], Project)
        ]

        # Get project sources, if any
        project_build_list = []
        for target_description in build_list:
            project_build_list.append(target_description.parent_project)
        project_build_list = list(dict.fromkeys(project_build_list))
        for project in project_build_list:
            project.get_sources()

        ### Note: the project_tree needs to be updated directly for dependencies
        ### to be used correctly in the `_target_from_description` function
        target_build_list = []
        for list_entry in build_list:
            if isinstance(list_entry, _TargetDescription):
                target = self._target_from_description(
                    self._project_tree.nodes[list_entry]["data"])
                if target:
                    target_build_list.append(target)
                self._project_tree.nodes[list_entry]["data"] = target
            elif isinstance(list_entry, _Target):
                target_build_list.append(list_entry)
            else:
                error_message = self.parent.log_message(
                    f"Found {target} in target list, which cannot be used because"
                    " it is not derived from Target or TargetDescription.")

                self._logger.exception(error_message)
                raise RuntimeError(error_message)

        if not target_build_list:
            self._logger.info("No targets to be built")
        else:
            self._logger.info(
                f"Building {', '.join([str(target) for target in target_build_list])}"
            )

        # Compile
        with _Pool(processes=number_of_threads) as process_pool:
            for target in target_build_list:
                target.compile(process_pool, False)

        # Link
        for target in target_build_list:
            target.link()

        # Bundle
        if self._environment.bundle:
            with _Pool(processes=number_of_threads) as process_pool:
                for target in target_build_list:
                    target.bundle()

        # Redistributable bundle
        if self._environment.redistributable:
            with _Pool(processes=number_of_threads) as process_pool:
                for target in target_build_list:
                    target.redistributable()
Exemplo n.º 18
0
def gen_tropnom(tmp_dir,staDb_path,rate,VMF1_dir,num_cores):
    '''
    Generating tropnominal file for valid stations in staDb file.Takes number of years from dr_info.npz
    Had to create additional for loop as file no 31 gives error, no matter what year it is (tropNom read error of VMF1 file). tdp file is created for each observation file
    '''
    num_cores = int(num_cores)

    #Creates a staDb object
    staDb=_StationDataBase.StationDataBase(dataBase = staDb_path) #creating staDb object
    stns = staDb.getStationList() #creating array with available station names
    print(len(stns),'sites found in staDb:',stns) #verbal output of stations that will be present in tropNom files
    drinfo_file = _dump_read(filename='{}/{}/{}.zstd'.format(tmp_dir,rnx_dr_lbl,drInfo_lbl))
    drinfo_years_list = drinfo_file.begin.dt.year.unique()

    #creating folder and file structure taking into account leap year.
    #resulting paths look as follows: year/doy/30h_tropNominal.vmf1
    #data on next day needed to create current day tropnominal
    days_in_year=_np.ndarray((len(drinfo_years_list)),dtype=int)
    current_year = _np.datetime64('today').astype('datetime64[Y]').astype(str).astype(int)
    for i in range(len(drinfo_years_list)):
        # vmf1 data is missing at current year (if it is not a prediction),
        # so an additional chech of files present is needed
        if int(drinfo_years_list[i]) != current_year:         
            days_in_year[i] = int(365 + (1*_calendar.isleap(drinfo_years_list[i])))
            date = (_np.datetime64(str(drinfo_years_list[i])) + (_np.arange(days_in_year[i]).astype('timedelta64[D]')))
            #Now all works correctly. The bug with wrong timevalues was corrected.

        else: 
            current_year_VMF1_dir_ah = _os.path.join(VMF1_dir, str(current_year),'ah')
            last_ah_file_path = sorted(_glob.glob(current_year_VMF1_dir_ah+'/*'))[-1]
            # e.g ah19315.h18.gz
            
            last_ah_filename = _os.path.basename(last_ah_file_path)
            last_day_used = int(last_ah_filename[4:7]) - 1 #we do -1 as to create a 30h tropnominal
            date = _np.datetime64(str(current_year)) + (_np.arange(last_day_used).astype('timedelta64[D]'))
            print('Last VMF1 day in {} is {}. Generating up to {}'.format(str(current_year),last_ah_filename[4:7],str(last_day_used)))

        begin = ((date - J2000origin) - _np.timedelta64(3,'[h]')).astype(int) 
        end = ((date - J2000origin) + _np.timedelta64(27,'[h]')).astype(int) 

        tropNom_out = (tmp_dir +'/tropNom/'+ str(drinfo_years_list[i])+'/'+_pd.Series(date).dt.dayofyear.astype(str).str.zfill(3)+'/30h_tropNominalOut_VMF1.tdp').values

        staDb_nd    = _np.ndarray((tropNom_out.shape),dtype=object)
        rate_nd     = _np.ndarray((tropNom_out.shape),dtype=object)
        VMF1_dir_nd = _np.ndarray((tropNom_out.shape),dtype=object)
        stns_nd     = _np.ndarray((tropNom_out.shape),dtype=object)

        staDb_nd.fill(staDb); rate_nd.fill(rate); VMF1_dir_nd.fill(VMF1_dir); stns_nd.fill(stns)

        tropnom_param = _np.column_stack((begin,end,tropNom_out,staDb_nd,rate_nd,VMF1_dir_nd,stns_nd))
        
            
        
        num_cores = num_cores if len(tropnom_param) > num_cores else len(tropnom_param)
        step_size = int(_np.ceil(len(tropnom_param) / num_cores))

        print(drinfo_years_list[i],'year tropnominals generation...',end=' ')
        print ('Number of files to process:', len(tropnom_param),'| Adj. num_cores:', num_cores)
        
        # tqdm implementation will produce lots of bars because of for loop pools
        for i in range(step_size):
            try:
                pool = _Pool(num_cores)
                pool.map(_gen_VMF1_tropNom, tropnom_param[_np.arange(i, len(tropnom_param), step_size)])
            finally:
                pool.close()
                pool.join()
        print('| Done!')
Exemplo n.º 19
0
def get_drInfo(tmp_dir, num_cores, tqdm, selected_rnx):
    '''Analysis is done over all stations in the projects tmp_dir. The problem to run analysis on all converted fies is 30 hour files
    Naming convention for 30h files was changed
    that are present in the directory so original files are difficult to extract. Need to change merging naming'''
    tmp_dir = _os.path.abspath(tmp_dir)
    num_cores = int(num_cores)  #safety precaution if str value is specified
    rnx_dir = _os.path.join(tmp_dir, rnx_dr_lbl)
    drinfo_dir = _os.path.join(rnx_dir, drInfo_lbl)
    if not _os.path.exists(drinfo_dir): _os.makedirs(drinfo_dir)

    selected_rnx['good'] = _dr_size(selected_rnx['dr_path']) > 20
    #New approach to file saving is to save SSSSYYYY.zstd files for each year in each station. More modular approach.
    stations = selected_rnx[
        selected_rnx['good']]['station_name'].unique().sort_values()
    print('stations selected: {}'.format(stations.get_values()))
    years = selected_rnx[selected_rnx['good']]['year'].unique()
    years.sort()
    print('years selected   : {}'.format(years))
    for station in stations:
        for year in years:
            filename = '{drinfo_dir}/{yyyy}/{station}{yy}.zstd'.format(
                drinfo_dir=drinfo_dir,
                yyyy=year.astype(str),
                station=station.lower(),
                yy=year.astype(str)[2:])
            if not _os.path.exists(filename):
                dr_station_year = selected_rnx[
                    (selected_rnx['station_name'] == station)
                    & (selected_rnx['year'] == year)]
                dr_good_station_year = dr_station_year['dr_path'][
                    dr_station_year['good']]
                if dr_good_station_year.shape[0] > 0:
                    print(
                        '{} good files found for {}{} out of {}. Running get_drInfo...'
                        .format(dr_good_station_year.shape[0], station, year,
                                dr_station_year.shape[0]))
                    num_cores = num_cores if dr_good_station_year.shape[
                        0] > num_cores else dr_good_station_year.shape[0]
                    with _Pool(processes=num_cores) as p:
                        if tqdm:
                            drinfo_df = _pd.concat(list(
                                _tqdm.tqdm_notebook(
                                    p.imap(_drInfo2df, dr_good_station_year),
                                    total=dr_good_station_year.shape[0],
                                    desc='{}{}'.format(station.lower(),
                                                       year.astype(str)[2:]))),
                                                   axis=0,
                                                   ignore_index=True)
                        else:
                            print('Running get_drInfo for {station}{yy}.zstd'.
                                  format(station=station.lower(),
                                         yy=year.astype(str)[2:]))
                            drinfo_df = _pd.concat(p.map(
                                _drInfo2df, dr_good_station_year),
                                                   axis=0,
                                                   ignore_index=True)
                    drinfo_df['station_name'] = drinfo_df[
                        'station_name'].astype('category')
                    drinfo_df['length'] = (drinfo_df['end'] -
                                           drinfo_df['begin']).astype(
                                               'timedelta64[h]').astype(int)
                    #Saving extracted data for furthe processing
                    _dump_write(data=drinfo_df,
                                filename=filename,
                                cname='zstd',
                                num_cores=num_cores)
                #gather should be separate, otherwise conflict and corrupted files
                else:
                    print('{} good files found for {}{} out of {}. Skipping.'.
                          format(dr_good_station_year.shape[0], station, year,
                                 dr_station_year.shape[0]))
            else:
                print('{} exists'.format(filename))
Exemplo n.º 20
0
def uncompress_mp(filelist, num_cores=10):
    with _Pool(processes=num_cores) as p:
        p.map(uncompress, filelist)
Exemplo n.º 21
0
def sghmc_chains(grad_log_den_data,
                 grad_log_den_prior,
                 data,
                 V_hat,
                 eps,
                 theta_0,
                 C,
                 heatup,
                 epoches,
                 batch_size,
                 chain=1,
                 Minv=None):
    '''
    Implementation of Stochastic Gradient Hamiltonian Monte Carlo.
    (See details in Chen et al., 2014)
    
    This is a multiprocess version of sghmc (only works on linux).
    It will run multiple(number = chain) simulations simutaneously. And returns a list of simulations
    
    Dimensions in sampling procdure:
        p: dimension of parameters(theta)
        n: number of observed data.
        m: dimension of data.
    
    INPUT:            
        grad_log_den_data: function with parameters (data,theta)
            to compute $\\nabla log(p(data|theta))$ (gradient with respect to theta) of a set of data.
            
        grad_log_den_prior: function with parameter (theta)
            to compute $\\nabla log(p(theta))$.
            
        data: np.array with shape (n,m)
            representing observed data 
            
        V_hat: np.array with shape (p,p)
            a matrix of estimated Fisher Information 
            
        eps: float or double
            learning rate
            
        theta_0: np.array with shape (p,)
            initial point of sampling.
            
        C: np.array with shape (p,p)
            a matrix representing friction, see paper for details. 
            C-0.5*eps*V_hat must be positive definite.
            
        heatup: int
            iteration to dump before storing sampling points.
            
        epoches: int
            iterations to run. Must be greater than heatup.
        
        batch_size: int
            size of a minibatch in an iteration, hundreds recommended
            
        chain: int
            number of chains to run. Each chain is a simulation.
            
        Minv: np.array with shape (p,p)
            if default(NULL), will be identical. (See paper for details)
            
    OUT:
        sample: a list (number = chain) of np.array with shape (epoches - heatup, p)
            sampled posterior thetas.
    '''

    n, m = data.shape
    p = theta_0.shape[0]

    if V_hat.shape != (p, p):
        _sys.exit('V_hat dimension do not match with theta')

    if Minv is not None:
        if Minv.shape != (p, p):
            _sys.exit('Minv dimension do not match with theta')

    if C.shape != (p, p):
        _sys.exit('C dimension do not match with theta')

    if n % batch_size != 0:
        _sys.exit('number of data should be divisible by batch_size')

    sqrt_noise = _la.sqrtm(2 * (C - 0.5 * eps * V_hat) * eps)
    batches = _np.int(_np.ceil(n / batch_size))

    if (Minv is None):
        sqrtM = None
        prer = eps
        fric = eps * C
    else:
        sqrtM = _la.sqrtm(_la.inv(Minv))
        prer = eps * Minv
        fric = eps * C @ Minv

    sp = _pt(_single_chain,
             theta_0=theta_0,
             epoches=epoches,
             heatup=heatup,
             p=p,
             n=n,
             Minv=Minv,
             sqrtM=sqrtM,
             data=data,
             batches=batches,
             prer=prer,
             gradU=_gradU,
             grad_log_den_data=grad_log_den_data,
             grad_log_den_prior=grad_log_den_prior,
             eps=eps,
             fric=fric,
             sqrt_noise=sqrt_noise)

    with _Pool(processes=chain) as pool:
        seedss = list(_np.random.randint(0, 10000, chain))
        res = pool.map(sp, seedss)

    return (res)
Exemplo n.º 22
0
    _logpath = _EXPDIR + "/" + _LOG_FILE


if True:                        # Worker
    _t_total= time.time()
    def proc(run):     # Called by each worker  
        _r_time = time.time()
        if conf.delay:
            if run[1] <= conf.workers:
                time.sleep(conf.delay*(run[1]-1))
        os.system('python3 ' + conf.file + ' ' + run[0] + (' > /dev/null' if not conf.verbose else '') )
        _elapsed = time.time() - _r_time
        s = str(dt.datetime.now(tz=pytz.timezone(_TIMEZONE)))[:19] + ' Completed: ' + str(run[1]) + " / " + str(n) + ' in ' + human_time(_elapsed) + ' ETA: ' + human_time(_elapsed * ((n - run[1])  / conf.workers))
        if conf.save:
            with open(_logpath,"a") as logfile:
                logfile.write(s + '\n')
        print(s)


if __name__ == '__main__':      # Main program
    pool = _Pool(processes=conf.workers)
    pool.imap(proc, runs)
    pool.close()
    pool.join()
    s = str(dt.datetime.now(tz=pytz.timezone(_TIMEZONE)))[:19] + ' All finished! This took: ' + human_time(time.time() - _t_total)
    if conf.save:
        with open(_logpath,"a") as logfile:
            logfile.write(s + '\n')
    print(s)