Пример #1
0
    def test_map_item_limit(self):

        TOO_BIG_COUNT = 100
        conf = pywren.wrenconfig.default()
        if 'scheduler' not in conf:
            conf['scheduler'] = {}
        conf['scheduler']['map_item_limit'] = TOO_BIG_COUNT
        wrenexec = pywren.default_executor(config=conf)

        def plus_one(x):
            return x + 1

        N = 10

        x = np.arange(N)
        futures = wrenexec.map(plus_one, x)
        pywren.get_all_results(futures)

        # now too big

        with pytest.raises(ValueError) as excinfo:

            x = np.arange(TOO_BIG_COUNT + 1)

            futures = wrenexec.map(plus_one, x)
def create_wordcloud_pywren(bucket_name):
    S3BUCKET = bucket_name
    links = pickle.load(open('links.pickle', 'rb'))
    wrenexec = pywren.default_executor()
    futures = wrenexec.map(wordcloud,
                           split_list(links, wanted_parts=100),
                           invoke_pool_threads=128,
                           extra_env={'S3BUCKET': S3BUCKET})
    pywren.get_all_results(futures)
Пример #3
0
def get_s3_tar_structure(prefix, bucket='imagenet2datav2', verbose=True):
    import pywren
    keys = list_all_keys(prefix, bucket=bucket)

    if verbose:
        print('Found {} keys:'.format(len(keys)))
        for k in keys[:10]:
            print('  ' + k)
        print('  ...')
        print('Starting PyWren ...')

    start = timer()
    pwex = pywren.default_executor()
    futures = pwex.map(list_files_in_s3_tarball, keys)
    results = pywren.get_all_results(futures)
    end = timer()

    if verbose:
        print('Done, took {} seconds'.format(end - start))

    assert len(results) == len(keys)

    final_res = {}
    for ii in range(len(keys)):
        final_res[keys[ii]] = results[ii]

    return final_res
Пример #4
0
def main():
    imgnt = imagenet.ImageNetData()
    wnids = list(imgnt.train_imgs_by_wnid.keys())

    train_tarball_names = get_tarball_names(wnids, 'imagenet-train/')
    val_tarball_names = get_tarball_names(wnids, 'imagenet-validation/val-')

    def flatten_train_tarball(tarball_name):
        return flatten_tarball(tarball_name,
                               prefix="imagenet-train-individual/")

    def flatten_val_tarball(tarball_name):
        return flatten_tarball(tarball_name,
                               prefix="imagenet-validation-individual/")

    pwex = pywren.default_executor()
    futures = pwex.map(flatten_val_tarball, val_tarball_names)
    failed_wnids = []
    for future, wnid in zip(futures, wnids):
        try:
            future.result()
        except:
            failed_wnids.append(wnid)
            print('wnid failed', wnid)
    print(failed_wnids)
    results = pywren.get_all_results(futures)
def make_polygons_parallel():
    '''
    Parallelized obtaining of geojsons.
    '''
    results = get_geojsons()
    futures = pwex.map(make_polygons, results)
    got_futures = pywren.get_all_results(futures)

    return got_futures
def get_geojsons_paralell():
    '''
    '''

    pwex = pywren.default_executor()
    futures = pwex.map(get_geojsons, GEOJSONS)
    geojsons = pywren.get_all_results(futures)

    return geojsons
Пример #7
0
    def test_simple_map(self):
        def plus_one(x):
            return x + 1

        x = np.arange(4)
        futures = self.wrenexec.map(plus_one, x)

        res = pywren.get_all_results(futures)
        np.testing.assert_array_equal(res, x + 1)
Пример #8
0
    def test_get_all_results(self):
        def plus_one(x):
            return x + 1
        N = 10

        x = np.arange(N)
        futures = self.wrenexec.map(plus_one, x)

        res = np.array(pywren.get_all_results(futures))
        np.testing.assert_array_equal(res, x + 1)
Пример #9
0
def brute_force_split(pin, n):
    split = len(pin) // n

    l = [pin[i * n:(i + 1) * n] for i in range(split)]

    print('Executing a lambda for each item in - ' + str(l))

    pwex = pywren.default_executor()
    # Executing async in parallel
    futures = pwex.map(brute_force_chars, l)
    return pywren.get_all_results(futures)
Пример #10
0
def main(thread, log=False):

    global outf
    global total_time

    # Initialize model

    print("Starting Training" + '-' * 30)
    start_time = time.time()
    fs = []

    fin = batch_file_size

    # start jobs
    minibatches = get_minibatches(fin)
    futures = start_batch(minibatches)
    fin = 0
    iter = 0

    thread.start()
    print("Main thread start")
    while time.time() - start_time < total_time:
        print("hit", time.time() - start_time)
        # Store model
        fin = 0
        res = []
        ded = []

        try:
            pywren.get_all_results(futures)
        except:
            continue

        fin = len(futures)
        iter += fin
        if fin > 0:
            print("Processed: %d" % fin)
            minibatches = get_minibatches(fin)
            futures = start_batch(minibatches)
    print("Main thread has stopped")
Пример #11
0
import pywren
import time
import numpy as np

from pywrenext.progwait import progwait

wrenexec = pywren.default_executor()


def sleep(x):
    time.sleep(x)
    return x


futures = wrenexec.map(sleep, np.arange(10) + 10)
futures_done, _ = progwait(futures)
print(pywren.get_all_results(futures_done))
Пример #12
0
import pywren
import numpy as np
import math
import boto3
import pickle


def my_function(x):
    return x + 7


wrenexec = pywren.default_executor()
futures = wrenexec.map(my_function, range(10))
pywren.get_all_results(futures)
Пример #13
0

if __name__ == '__main__':
    t0 = time.time()

    # Get number of pages to iterate over
    pages = html_soup.select('li.current')
    pages = int(pages[0].getText().split()[-1])

    # Get a list of URLs to map over
    catalogue_urls = [
        base_url + f'catalogue/page-{i}.html' for i in range(1, pages + 1)
    ]

    # Scrape every result page using Lambda
    book_list = pywren.get_all_results(pwex.map(scrape_books, catalogue_urls))
    book_list = list(itertools.chain(*book_list))  # flatten list
    db['books'].insert_many(book_list)

    t1 = time.time()
    print(f'Finished finding books in {t1 - t0} seconds')

    # Scrape each book
    batch_size = int(sys.argv[1])
    lambda_tasks, result = [], []
    for i in range(0, batch_size):
        chunk = book_list[i::batch_size]
        book_ids = [b['book_id'] for b in chunk]
        lambda_tasks.append(pwex.map(scrape_book, book_ids))

    # Wait for all scrapes to complete
Пример #14
0
def mmap(pwex, func, data):
  futures = pwex.map(func, data)
  res = pywren.get_all_results(futures)
  return res
Пример #15
0
    index += fin
    fs.extend(start_batch(minibatches))
    fin = 0

    iter = 0
    while iter < 100:
        # Store model
        
        
        fin = 0
        res = []
        ded = []

        
        t = time.time()
        print(pywren.get_all_results(fs))
        print(time.time() - t)
        exit()



        exit()
        print("Start pool")
        t = time.time()
        pool = ThreadPool(6)
        resa = []
        resa = pool.map(m, fs)
        print("End pool: %f" % (time.time() - t))
        
        res = []
        for a in resa:
Пример #16
0
import pywren
import numpy as np
import sys


def my_function(x):
    return x + 7


print(my_function(3))


def version(x):
    return sys.version_info[0:2]


print(version(None))

wrenexec = pywren.default_executor()
future = wrenexec.call_async(version, 3)
print(future.result())

futures = wrenexec.map(my_function, range(10))
print(pywren.get_all_results(futures))