Exemplo n.º 1
0
def submit_task_withdep(driver_handle, task_object_dependencies=[]):
    ''' submit a task that depend on a list of @args'''
    task = ray.local_scheduler.Task(
        ray.local_scheduler.ObjectID(random_string()),
        ray.local_scheduler.ObjectID(random_string()),
        task_object_dependencies,
        1,  # num_returns
        ray.local_scheduler.ObjectID(random_string()),
        0)
    logger.debug("[DRIVER]: submitting task ", task.task_id())
    driver_handle.node_manager_client.submit(task)
    logger.debug("[DRIVER]: task return values", task.returns())
    return task.returns()
Exemplo n.º 2
0
def call_scraper(params, ms_id):
    """ Call to crawl async elements

        Params:
        -------
        params: dict
            Stores params to crawl
        ms_id: uuid
            Master scraper ID 
    """
    logger.debug("Calling to scrape store: {} - {}".format(
        params.get('name'), params.get('external_id')))
    crawl_store.apply_async(args=(ms_id, params), queue=CELERY_QUEUE)
Exemplo n.º 3
0
def submit_tasks_nodep(driver_handle, num_tasks):
    ''' submit a task that depend on a list of @args'''
    for i in range(num_tasks):
        task = ray.local_scheduler.Task(
            ray.local_scheduler.ObjectID(random_string()),
            ray.local_scheduler.ObjectID(random_string()),
            [],
            1,  # num_returns
            ray.local_scheduler.ObjectID(random_string()),
            0)

        logger.debug("[DRIVER]: submitting task ", task.task_id())
        driver_handle.node_manager_client.submit(task)
        logger.debug("[DRIVER]: task return values", task.returns())
Exemplo n.º 4
0
def submit_task_chains(num_chains, tasks_per_chain):
    # return task placement map on output
    chain_returns = []
    task_placement_map_ = {}
    for chain_num in range(num_chains):
        last_task_returns = []
        task_placement_map_[chain_num] = []
        for i in range(tasks_per_chain):
            task_returns = submit_task_withdep(
                driver, task_object_dependencies=last_task_returns)
            last_task_returns = task_returns
            task_placement_map_[chain_num].append(task_returns[0])
        chain_returns.append(last_task_returns)

    logger.debug("chain_returns=", chain_returns)
    chain_results = driver.get([r[0] for r in chain_returns], timeout_ms=5000)
    print("[DRIVER]: chain return values: ", chain_results)

    return task_placement_map_
Exemplo n.º 5
0
def TEST_run_task_chains(num_chains, tasks_per_chain):
    task_placement_map = submit_task_chains(num_chains=num_chains,
                                            tasks_per_chain=tasks_per_chain)
    logger.debug("[DRIVER]: task placement information, per chain:")
    task_placement_total = []
    for chain_num in range(len(task_placement_map)):
        task_placement_list = driver.get(task_placement_map[chain_num],
                                         timeout_ms=5000)
        task_placement_total += [t[1] for t in task_placement_list]
        logger.debug(chain_num, task_placement_list)
    logger.debug("task placement overall: ", task_placement_total)
    task_placement_stats = [(v, task_placement_total.count(v))
                            for v in set(task_placement_total)]
    num_total_tasks = sum([t[1] for t in task_placement_stats])
    print("total tasks executed = ", num_total_tasks)
    assert (num_total_tasks == num_chains * tasks_per_chain)
    print("task placement breakdown: total=", task_placement_stats)
Exemplo n.º 6
0
def TEST_run_tasks_nodep(num_tasks):
    # This test is the same as having num_tasks chains with 1 task per chain
    # In this test we assume the num_tasks x 1 chain structure.
    task_placement_map = submit_task_chains(num_chains=num_tasks,
                                            tasks_per_chain=1)
    logger.debug("[DRIVER]: task placement information, per chain:")
    task_placement_total = []
    for chain_num in range(len(task_placement_map)):
        task_placement_list = driver.get(task_placement_map[chain_num],
                                         timeout_ms=5000)
        task_placement_total += [t[1] for t in task_placement_list]
        logger.debug(chain_num, task_placement_list)
    logger.debug("task placement overall: ", task_placement_total)
    task_placement_stats = [(v, task_placement_total.count(v))
                            for v in set(task_placement_total)]
    num_total_tasks = sum([t[1] for t in task_placement_stats])
    print("total tasks executed = ", num_total_tasks)
    assert (num_total_tasks == num_tasks)
    print("task placement breakdown: total=", task_placement_stats)
Exemplo n.º 7
0
parser = argparse.ArgumentParser()
parser.add_argument("raylet_socket_name")
parser.add_argument("object_store_socket_name")

if __name__ == '__main__':
    args = parser.parse_args()

    driver = Worker(args.raylet_socket_name,
                    args.object_store_socket_name,
                    is_worker=False)

    task = ray.local_scheduler.Task(
        ray.local_scheduler.ObjectID(random_string()),
        ray.local_scheduler.ObjectID(random_string()), [], 1,
        ray.local_scheduler.ObjectID(random_string()), 0)
    logger.debug("submitting %s", task.task_id())
    driver.node_manager_client.submit(task)

    logger.debug("Return values were %s", task.returns())
    task2 = ray.local_scheduler.Task(
        ray.local_scheduler.ObjectID(random_string()),
        ray.local_scheduler.ObjectID(random_string()), task.returns(), 1,
        ray.local_scheduler.ObjectID(random_string()), 0)
    logger.debug("Submitting dependent task 2 %s", task2.task_id())
    driver.node_manager_client.submit(task2)

    # Make sure the tasks get executed and we can get the result of the last
    # task.
    obj = driver.get(task2.returns(), timeout_ms=1000)
Exemplo n.º 8
0
import argparse

import ray
from worker import Worker, logger
from ray.utils import random_string

parser = argparse.ArgumentParser()
parser.add_argument("raylet_socket_name")
parser.add_argument("object_store_socket_name")

if __name__ == '__main__':
    args = parser.parse_args()

    driver = Worker(args.raylet_socket_name,
                    args.object_store_socket_name,
                    is_worker=False)

    task1 = ray.local_scheduler.Task(
        ray.local_scheduler.ObjectID(random_string()),
        ray.local_scheduler.ObjectID(random_string()), [], 1,
        ray.local_scheduler.ObjectID(random_string()), 0)
    logger.debug("submitting", task1.task_id())
    driver.node_manager_client.submit(task1)

    logger.debug("Return values were", task1.returns())
    print("[DRIVER] Return values were", task1.returns())
    # Make sure the tasks get executed and we can get the result of the
    # last task
    obj = driver.get(task1.returns(), timeout_ms=1000)
    print("[DRIVER]: task1 driver.get result ", obj)
Exemplo n.º 9
0
        } for st in stores_d[:int(STORES)]]
        all_st.extend(stores_list)
        logger.info('Got {} stores for {}'.format(len(stores_list), retailer))
    return all_st


# Main method
if __name__ == '__main__':
    logger.info("Started master scraper: " + CELERY_QUEUE +
                " / scraper_type: " + str(SCRAPER_TYPE))
    if SCRAPER_TYPE and len(SCRAPER_TYPE) > 0:
        if SCRAPER_TYPE == 'price' or SCRAPER_TYPE == 'item':
            # Fetch Valid Stores
            sts_to_crawl = request_valid_stores(retailers_to_get,
                                                str(SCRAPER_TYPE))
            logger.debug(sts_to_crawl[0])
            # Number of stores to crawl
            num_stores = range(0, len(sts_to_crawl))
            ms_id = stream_monitor('master',
                                   params=sts_to_crawl[0],
                                   num_stores=len(sts_to_crawl))
            logger.info("Crawling {} stores!".format(len(sts_to_crawl)))
            # Call to crawl all stores async
            for s in num_stores:
                logger.debug("Calling to scrape")
                call_scraper(sts_to_crawl[s], ms_id)
                # call_parallel(sts_to_crawl[s], ms_id)
        elif SCRAPER_TYPE == 'store':
            logger.debug("CALLING STORES")
            ms_id = stream_monitor('master', params={})
            st_id = 1