示例#1
0
文件: main.py 项目: vishalbelsare/ray
def validate_part(path: Path):
    logging_utils.init()
    sum_path = path + ".sum"
    _run_valsort(["-o", sum_path, path])
    logging.info(f"Validated output {path}")
    with open(sum_path, "rb") as fin:
        return os.path.getsize(path), fin.read()
示例#2
0
def validate_part(path: Path):
    logging_utils.init()
    proc = subprocess.run([constants.VALSORT_PATH, path], capture_output=True)
    if proc.returncode != 0:
        logging.critical("\n" + proc.stderr.decode("ascii"))
        raise RuntimeError(f"Validation failed: {path}")
    logging.info(f"Validated output {path}")
示例#3
0
文件: main.py 项目: vishalbelsare/ray
def mapper(args: Args, mapper_id: PartId, boundaries: List[int],
           path: Path) -> List[np.ndarray]:
    logging_utils.init()
    part = _load_partition(args, path)
    sort_fn = (_dummy_sort_and_partition
               if args.skip_sorting else sortlib.sort_and_partition)
    blocks = sort_fn(part, boundaries)
    return [part[offset:offset + size] for offset, size in blocks]
示例#4
0
文件: main.py 项目: vishalbelsare/ray
def generate_part(args: Args, part_id: PartId, size: RecordCount,
                  offset: RecordCount) -> PartInfo:
    logging_utils.init()
    pinfo = _part_info(args, part_id)
    subprocess.run(
        [constants.GENSORT_PATH, f"-b{offset}", f"{size}", pinfo.path],
        check=True)
    logging.info(f"Generated input {pinfo}")
    return pinfo
示例#5
0
文件: main.py 项目: yiranwang52/ray
def init():
    if args.ray_address is None:
        ray.init()
    else:
        ray.init(address=args.ray_address)
    logging_utils.init()
    logging.info(args)
    logging.info(ray.available_resources())
    os.makedirs(constants.WORK_DIR, exist_ok=True)
示例#6
0
def generate_part(part_id: PartId, size: RecordCount,
                  offset: RecordCount) -> PartitionInfo:
    logging_utils.init()
    pinfo = _make_partition_info(part_id)
    if not args.skip_input:
        subprocess.run(
            [constants.GENSORT_PATH, f"-b{offset}", f"{size}", pinfo.path],
            check=True)
        logging.info(f"Generated input {pinfo}")
    return pinfo
示例#7
0
 def __init__(
     self,
     gauges: List[str],
     histograms: List[Tuple[str, List[int]]],
 ):
     self.counts = {m: 0 for m in gauges}
     self.gauges = {m: Gauge(m) for m in gauges}
     self.reset_gauges()
     self.histograms = {m: Histogram(m, boundaries=b) for m, b in histograms}
     logging_utils.init()
示例#8
0
文件: main.py 项目: vishalbelsare/ray
def init(args: Args):
    if not args.ray_address:
        ray.init(resources={"worker": os.cpu_count()})
    else:
        ray.init(address=args.ray_address)
    logging_utils.init()
    logging.info(args)
    os.makedirs(constants.WORK_DIR, exist_ok=True)
    resources = ray.cluster_resources()
    logging.info(resources)
    args.num_workers = resources["worker"]
    progress_tracker = tracing_utils.create_progress_tracker(args)
    return progress_tracker
示例#9
0
def mapper(boundaries: List[int], mapper_id: PartId,
           path: Path) -> List[ray.ObjectRef]:
    logging_utils.init()
    task_id = f"M-{mapper_id} Mapper"
    logging.info(f"{task_id} starting")
    if args.skip_input:
        block_size = int(np.ceil(args.part_size / args.num_parts))
        return [
            ray.put(np.frombuffer(np.random.bytes(block_size), dtype=np.uint8))
            for _ in range(args.num_parts)
        ]

    part = _load_partition(path)
    sort_fn = _dummy_sort_and_partition if args.skip_sorting else sortlib.sort_and_partition
    blocks = sort_fn(part, boundaries)
    logging.info(f"{task_id} saving to object store")
    return [ray.put(part[offset:offset + size]) for offset, size in blocks]
示例#10
0
def reducer(reducer_id: PartId, *blocks) -> PartitionInfo:
    logging_utils.init()
    task_id = f"R-{reducer_id} Reducer"
    logging.info(f"{task_id} starting")
    blocks = [np.copy(ray.get(block)) for block in blocks]
    merge_fn = _dummy_merge if args.skip_sorting else sortlib.merge_partitions
    merger = merge_fn(blocks, args.reducer_batch_num_records)
    pinfo = _make_partition_info(reducer_id, "output")
    if args.skip_output:
        total = 0
        for datachunk in merger:
            total += len(datachunk)
    else:
        with open(pinfo.path, "wb") as fout:
            for datachunk in merger:
                fout.write(datachunk)
    logging.info(f"{task_id} done")
    return pinfo
示例#11
0
def init():
    ray.init(address="auto")
    logging_utils.init()
    logging.info(args)
    logging.info(ray.available_resources())
    os.makedirs(constants.WORK_DIR, exist_ok=True)