Exemplo n.º 1
0
    def build_clusters(self):
        set_list = [(k, n) for k in self.k_list for n in range(self.num_iter)]

        p = Pool()
        p.starmap(self.prepare_directory, tqdm(set_list))
        p.close()

        self.write_dirlist()
        self.print_face()
Exemplo n.º 2
0
class Parallel(Executor):
    def __init__(self, pool_size=cpu_count() - 1):
        self.workers = pool_size
        self.pool = Pool(processes=pool_size)

    def run(self, func, arglist):
        return self.pool.starmap(func, arglist)

    def num_workers(self):
        return self.workers
Exemplo n.º 3
0
def all_countries(base_path,
                  multiprocess=True,
                  overwrite=True,
                  savefig=False,
                  report=False):
    """
    Main function to estimate the length of all the roads and countries we are interested in. 

    Args:
        *base_path* : Base path to the location of all files and directories in this project.
        
        *multiprocess* : Set to True by default. Set to False in the case of limited processing power.
        
        *overwrite* : Set to True by default. This relates to all input data (i.e. .poly files, .osm.pbf files and shapefiles).

        *savefig* : Set to False by default. When set to True, it will return a figure with the roads of a country.

    Returns:
        An Excel file with the length of all **Primary**, **Secondary**, **Tertiary**, **Track** and **Other** roads for each country.
    
    """

    print('The calculation of road lenghts has started!')
    start = time.time()

    # =============================================================================
    #     """ Set path to dirs"""
    # =============================================================================
    dir_out = os.path.join(base_path, 'output_data')
    poly_dir = os.path.join(base_path, 'poly_files')
    osm_path_in = os.path.join(base_path, 'osm_continent')
    fig_dir = os.path.join(base_path, 'Figures')

    # =============================================================================
    #     """ create directories if they are not created yet """
    # =============================================================================
    if not os.path.exists(dir_out):
        os.makedirs(dir_out)

    if not os.path.exists(poly_dir):
        os.makedirs(poly_dir)

    if not os.path.exists(osm_path_in):
        os.makedirs(osm_path_in)

    if (savefig == True) and not os.path.exists(fig_dir):
        os.makedirs(fig_dir)
# =============================================================================
#     """Set path to files we use """
# =============================================================================
    wb_country_in = os.path.join(base_path, 'input_data', 'wbccodes2014.csv')
    global_shape = os.path.join(base_path, 'input_data',
                                '2015_GAUL_Dataset_Mod.gdb')

    # =============================================================================
    #     """Load country shapes and list and only save the required countries"""
    # =============================================================================
    wb_country = pd.read_csv(wb_country_in, header=0, index_col=0)

    #filter high income countries from country file
    country_list = wb_country[['country', 'continent'
                               ]].loc[wb_country['wbregion'] != 'YHI']

    # add column to country list so we can easily look up the required continental
    # osm file for that continent
    map_continent = {
        'MA': 'central-america',
        'SA': 'south-america',
        'EU': 'europe',
        'AS': 'asia',
        'AU': 'australia-oceania',
        'AF': 'africa',
        'AM': 'north-america'
    }

    country_list['osm-cont'] = country_list['continent'].map(
        lambda x: (map_continent[x]))

    # =============================================================================
    #     """ create .poly files to clip countries from osm.pbf files """
    # =============================================================================
    if not os.listdir(poly_dir):
        create_poly_files(base_path, global_shape, save_shapefile=overwrite)
# =============================================================================
# """ check if we have actually downloaded the openstreetmap input files. If not,
# lets download them. Note: this will take a while! """
# =============================================================================
    continent_list = [
        'central-america', 'south-america', 'europe', 'asia',
        'australia-oceania', 'africa', 'north-america'
    ]

    for continent in continent_list:
        url = 'http://download.geofabrik.de/%s-latest.osm.pbf' % continent
        if '%s-latest.osm.pbf' % (continent) not in os.listdir(osm_path_in):
            urllib.request.urlretrieve(url, osm_path_in)

# =============================================================================
#     """ create extracted osm files for each country per continent """
# =============================================================================
    out = []
    countries = []
    continent_osms = []
    base_paths = []
    overwrites = []
    savefigs = []
    reporting = []
    for country in country_list.iterrows():
        country = country[1]
        continent_osm = os.path.join(
            osm_path_in, '%s-latest.osm.pbf' % (country['osm-cont']))
        countries.append(country['country'])
        continent_osms.append(continent_osm)
        base_paths.append(base_path)
        overwrites.append(overwrite)
        savefigs.append(savefig)
        reporting.append(report)

    # multiprocessing will start if set to True. Set to False with limited processing capacities
    if multiprocess == True:
        pool = Pool(cpu_count() - 1)
        out = pool.starmap(
            single_country,
            zip(countries, continent_osms, base_paths, overwrites, savefigs,
                reporting))

    # when multiprocessing set to False, we will just loop over the countries.
    else:
        out = []
        i = 0
        for country in country_list.iterrows():
            country = country[1]
            continent_osm = os.path.join(
                osm_path_in, '%s-latest.osm.pbf' % (country['osm-cont']))
            out.append(
                single_country(country['country'], continent_osm, base_path,
                               overwrites[i], savefigs[i], reporting[i]))
            i += 1

    df = pd.concat(out, axis=1).T

    map_country = dict(zip(wb_country['country'], wb_country['country_name']))
    df['Country'] = df.index.to_series().map(map_country)

    df.set_index('Country', inplace=True, drop=True)

    writer = pd.ExcelWriter(os.path.join(dir_out, 'dist_roads.xlsx'))
    df.to_excel(writer, 'output')
    writer.save()

    end = time.time()

    print('It took ' + str(np.float16((end - start))) + " seconds to finish!")
Exemplo n.º 4
0
from multiprocess import Pool
from time import sleep
import time


def f(x):
    sleep(1)
    return x + 1


def g(s, x):
    sleep(s)
    return x + 1


# [f(i) for i in range(5)]  # slow

pool = Pool(4)
starttime = time.time()
# res = pool.map(f, range(12))
# res2 = pool.map(lambda a: g(1, a), range(12))
res3 = pool.starmap(g, [[1, 1], [1, 2], [1, 3], [1, 4]])
print(time.time() - starttime)
Exemplo n.º 5
0
    # generating data for neural net with model as input and grid as output
    input1 = model_input
    start = time.time()

    # going parallel
    cpu_cores = cpu_count()
    parallel_set = np.array_split(model_input, cpu_cores, axis=0)
    parallel_list = []

    # generating list of datasets for parallel
    for i in range(cpu_cores):
        parallel_list.append((parallel_set[i], someOptionList))

    # parallel
    pool = Pool(cpu_cores)
    res = pool.starmap(priceGenerator, parallel_list)
    output1 = np.concatenate(res, axis = 0)
    stop = time.time()
    print("time: ", stop-start)

    # saving dataset1
    np.savetxt("Data/hestonPriceGridInput.csv", input1, delimiter=",")
    np.savetxt("Data/hestonPriceGridOutput.csv", output1, delimiter=",")

    # generating data for nn with all inputs and 1 output price
    total_comb = np.shape(model_input)[0] * np.shape(output1)[1]
    total_cols = np.shape(model_input)[1] + np.shape(option_input)[1]
    total_options = np.shape(option_input)[0]
    input2 = np.empty((total_comb, total_cols))
    output2 = np.empty((total_comb, 1))
    for i in range(np.shape(model_input)[0]):
Exemplo n.º 6
0
    # load dataframe with regions
    regions_shape = os.path.join(base_path,'regions','Tanzania Regions.shp')
    tza_regions = gpd.read_file(regions_shape)
   
    regions = list(filter(None.__ne__, tza_regions['REGION']))

    regions = ['Arusha','Dar-Es-Salaam','Dodoma','Iringa','Kagera','Kigoma','Kilimanjaro','Manyara',
               'Tabora','Mbeya','Morogoro','Mtwara','Mwanza','Pwani','Ruvuma','Singida','Rukwa',
               'Lindi','Tanga','Shinyanga','Manyara'] #[

    base_paths = [base_path]*len(regions)
    region_shapes = [regions_shape]*len(regions)
    
    pool = Pool(cpu_count()-1)
    pool.starmap(dist_junction, zip(regions,region_shapes,base_paths)) 

# =============================================================================
#     # merge output of distance to junction    
# =============================================================================
    shp_network = os.path.join(base_path,'output_closest_jct','dist_to_jct_tza.shp')
    
    df_list_regions = []
    for region in regions:
        if len([i for i in os.listdir(os.path.join(base_path,'output_closest_jct')) if i.endswith('%s.shp' % region)]) != 0:
            try:
                country_path = os.path.join(base_path,'output_closest_jct','%s.shp' % (region))
                inb = gpd.read_file(country_path)
                df_list_regions.append(gpd.read_file(country_path))    
            except:
                print('%s doesnt seem to have been finished'  % (region))
Exemplo n.º 7
0
                 train_input, train_output, test_input, test_output, 1000, 100,
                 0.1, "sabr5.h5", 1000, 0.9, 0.1, 0, False, True, 15
             ],
             [
                 train_input, train_output, test_input, test_output, 1000, 100,
                 0.1, "sabr6.h5", 1000, 0.9, 0.1, 0.2, False, True, 15
             ],
             [
                 train_input, train_output, test_input, test_output, 1000, 100,
                 0.1, "sabr7.h5", 1000, 0.9, 0.1, 0, False, False, 15
             ],
             [
                 train_input, train_output, test_input, test_output, 1000, 100,
                 0.1, "sabr8.h5", 1000, 0.9, 0.1, 0.2, False, False, 15
             ]]
res = pool.starmap(ms.model_train, input_set)
print(res)
"""
# Number of nodes
nodes = 1000

# Model creation
model = Sequential()

# Layer 1
model.add(Dense(nodes, input_shape=(14,)))
model.add(Activation('softplus')) # Rectified Linear Unit, f(x) = max(x,0)
model.add(Dropout(0.2))

# Layer 2, output
model.add(Dense(10))
def run():
    parser = create_parser()
    args = parser.parse_args()
    if args.num_threads is not None:
        threads = int(args.num_threads)
    else:
        threads = 1

    print("Parsing BLAST output.\n")

    num_blast = count_line(args.input_blast_file)
    batch_blast = split_file(args.input_blast_file, num_blast, threads)
    print("\nSplit BLAST input into " + str(len(batch_blast)) +
          " subunit(s).\n")
    #print(batch_blast)

    num_seq = count_seq(args.input_fasta_file)
    batch_seq = split_file(args.input_fasta_file, num_seq, threads)
    print("\nSplit sequence input into " + str(len(batch_seq)) +
          " subunit(s).\n")
    #print(batch_seq)

    ### Parse BLAST output file

    if args.keep_term is not None:
        print("\nKeeping sequences belonging to sskingdom: " +
              str(args.keep_term) + "\n")
        keepage = []
        p = Pool(threads)
        with open(args.input_blast_file + "_tmp", "w") as out:
            for i, j in p.starmap(parse_blast,
                                  [(batch_blast, args.keep_term)]):
                for line in j:  ### fix nested for loop
                    out.write(line)
                keepage = i
        blast = args.input_blast_file + "_tmp"
    else:
        p = Pool(threads)
        print("\nNo parsing of BLAST output.\n")
        keepage = []
        blast = args.input_blast_file

### Using BLAST output file (parsed or otherwise), figure out which baits need to be removed

    print("\nDetermining baits to remove.\n")

    for i in p.starmap(to_remove, [(batch_seq, blast)]):
        removal = i

    #print("Removed: " + str(removal))
    #print("Kept: " + str(keepage))


### Off-target filter using above list of baits to remove (or ignore when checking each bait)

    print("\nFiltering baits.\n")
    count_off_target = 0
    with open(args.output_fasta_file, "w") as out2:
        for i in p.starmap(off_target, [(batch_seq, removal, blast)]):
            for t in i:
                out2.write(">" + str(t[0]) + "\n" + str(t[1]) + "\n")
                count_off_target = count_off_target + 1

    count_off_target = num_seq - count_off_target

    return (count_off_target)