Exemplo n.º 1
0
def update_ttls_parent(sources, state_db, args):

    process_names = {
        repr(source): "%s:%d" % (source['host'], source['port'])
        for source in sources
    }

    processes = []
    for source in sources:
        name = process_names[repr(source)]
        process = multiprocessing.Process(target=update_ttls,
                                          name=name,
                                          kwargs=dict(
                                              source=source,
                                              state_path=state_db._path,
                                              seconds=args.seconds))

        process.start()
        processes.append(process)

        utils.wait_for_processes(processes)
Exemplo n.º 2
0
def copy_collection_parent(sources, dest, state_db, args):
    """
    drive the collection copying process by delegating work to a pool of worker processes
    """

    # ensure state db has rows for each source/dest pair
    for source in sources:
        state_db.add_source_and_dest(source, dest)

    # space-pad all process names so that tabular output formats line up
    process_names = {repr(source): "%s:%d" % (source['host'], source['port'])
                     for source in sources}
    process_names['parent'] = PARENT_PROCESS_NAME
    max_process_name_len = max(len(name) for name in process_names.itervalues())
    for key in process_names:
        process_names[key] = string.ljust(process_names[key], max_process_name_len)

    multiprocessing.current_process().name = process_names['parent']

    # -----------------------------------------------------------------------
    # perform initial copy, if it hasn't been done yet
    # -----------------------------------------------------------------------
    in_initial_copy = len(state_db.select_by_state(CopyStateDB.STATE_INITIAL_COPY))
    if in_initial_copy and in_initial_copy < len(sources):
        die("prior attempt at initial copy failed; rerun with --restart")
    if in_initial_copy > 0:
        ensure_empty_dest(dest)

        # each worker process copies one shard
        processes = []
        for source in sources:
            name = process_names[repr(source)]
            process = multiprocessing.Process(target=copier.copy_collection,
                                              name=name,
                                              kwargs=dict(source=source,
                                                          dest=dest,
                                                          state_path=state_db._path,
                                                          percent=args.percent))
            process.start()
            processes.append(process)


        # wait for all workers to finish
        utils.wait_for_processes(processes)

    # -----------------------------------------------------------------------
    # build indices on main process, since that only needs to be done once
    # -----------------------------------------------------------------------
    waiting_for_indices = len(state_db.select_by_state(CopyStateDB.STATE_WAITING_FOR_INDICES))
    if waiting_for_indices and waiting_for_indices < len(sources):
        die("not all initial copies have been completed; rerun with --restart")
    if waiting_for_indices > 0:
        log.info("building indices")
        copier.copy_indexes(sources[0], dest)
        for source in sources:
            state_db.update_state(source, dest, CopyStateDB.STATE_APPLYING_OPLOG)

    # -----------------------------------------------------------------------
    # apply oplogs
    # -----------------------------------------------------------------------
    applying_oplog = state_db.select_by_state(CopyStateDB.STATE_APPLYING_OPLOG)
    if len(applying_oplog) < len(sources):
        die("this shouldn't happen!")

    log.info("starting oplog apply")

    # create worker thread that prints headers for oplog stats on a regular basis;
    # we do this to prevent the visual clutter caused by multiple processes doing this
    #
    # we avoid using gevent in the parent process to avoid weirdness I've seen with fork()ed
    # gevent loops
    header_delay = max(float(20) / len(sources),10) 
    stats_name = string.ljust("stats", max_process_name_len)
    stats_proc = multiprocessing.Process(target=oplog_applier.print_header_worker,
                                         args=(header_delay,),
                                         name=stats_name)
    stats_proc.start()

    # need to isolate calls to gevent here, to avoid forking with monkey-patched modules
    # (which seems to create funkiness)
    processes = []
    for source in sources:
        name = process_names[repr(source)]
        process = multiprocessing.Process(target=oplog_applier.apply_oplog,
                                          name=name,
                                          kwargs=dict(source=source,
                                                      dest=dest,
                                                      percent=args.percent,
                                                      state_path=state_db._path))
        process.start()
        processes.append(process)

    # this should *never* finish
    processes.append(stats_proc)
    utils.wait_for_processes(processes)
Exemplo n.º 3
0
def copy_collection_parent(sources, dest, state_db, args):
    """
    drive the collection copying process by delegating work to a pool of worker processes
    """

    # ensure state db has rows for each source/dest pair
    for source in sources:
        state_db.add_source_and_dest(source, dest)

    # space-pad all process names so that tabular output formats line up
    process_names = {repr(source): "%s:%d" % (source["host"], source["port"]) for source in sources}
    process_names["parent"] = PARENT_PROCESS_NAME
    max_process_name_len = max(len(name) for name in process_names.itervalues())
    for key in process_names:
        process_names[key] = string.ljust(process_names[key], max_process_name_len)

    multiprocessing.current_process().name = process_names["parent"]

    # -----------------------------------------------------------------------
    # perform initial copy, if it hasn't been done yet
    # -----------------------------------------------------------------------
    in_initial_copy = len(state_db.select_by_state(CopyStateDB.STATE_INITIAL_COPY))
    if in_initial_copy and in_initial_copy < len(sources):
        die("prior attempt at initial copy failed; rerun with --restart")
    if in_initial_copy > 0:
        ensure_empty_dest(dest)

        # each worker process copies one shard
        processes = []
        for source in sources:
            name = process_names[repr(source)]
            process = multiprocessing.Process(
                target=copier.copy_collection,
                name=name,
                kwargs=dict(source=source, dest=dest, state_path=state_db._path, percent=args.percent),
            )
            process.start()
            processes.append(process)

        # wait for all workers to finish
        utils.wait_for_processes(processes)

    # -----------------------------------------------------------------------
    # build indices on main process, since that only needs to be done once
    # -----------------------------------------------------------------------
    waiting_for_indices = len(state_db.select_by_state(CopyStateDB.STATE_WAITING_FOR_INDICES))
    if waiting_for_indices and waiting_for_indices < len(sources):
        die("not all initial copies have been completed; rerun with --restart")
    if waiting_for_indices > 0:
        log.info("building indices")
        copier.copy_indexes(sources[0], dest)
        for source in sources:
            state_db.update_state(source, dest, CopyStateDB.STATE_APPLYING_OPLOG)

    # -----------------------------------------------------------------------
    # apply oplogs
    # -----------------------------------------------------------------------
    applying_oplog = state_db.select_by_state(CopyStateDB.STATE_APPLYING_OPLOG)
    if len(applying_oplog) < len(sources):
        die("this shouldn't happen!")

    log.info("starting oplog apply")

    # create worker thread that prints headers for oplog stats on a regular basis;
    # we do this to prevent the visual clutter caused by multiple processes doing this
    #
    # we avoid using gevent in the parent process to avoid weirdness I've seen with fork()ed
    # gevent loops
    header_delay = max(float(20) / len(sources), 10)
    stats_name = string.ljust("stats", max_process_name_len)
    stats_proc = multiprocessing.Process(
        target=oplog_applier.print_header_worker, args=(header_delay,), name=stats_name
    )
    stats_proc.start()

    # need to isolate calls to gevent here, to avoid forking with monkey-patched modules
    # (which seems to create funkiness)
    processes = []
    for source in sources:
        name = process_names[repr(source)]
        process = multiprocessing.Process(
            target=oplog_applier.apply_oplog,
            name=name,
            kwargs=dict(source=source, dest=dest, percent=args.percent, state_path=state_db._path),
        )
        process.start()
        processes.append(process)

    # this should *never* finish
    processes.append(stats_proc)
    utils.wait_for_processes(processes)
Exemplo n.º 4
0
def copy_collection_parent(manifests, state_db, args):
    """
    drive the collection copying process by delegating work to a pool of worker processes
    """

    # ensure state db has rows for each source/dest pair
    for manifest in manifests:
        state_db.add_manifest(manifest)

    # space-pad all process names so that tabular output formats line up
    process_names = dict([
        (repr(manifest), "%s:%s.%s->%s:%s.%s" %
         (manifest["srchost"], manifest["srcdb"], manifest["srccol"],
          manifest["desthost"], manifest["destdb"], manifest["destcol"]))
        for manifest in manifests
    ])
    process_names['parent'] = PARENT_PROCESS_NAME
    max_process_name_len = max(
        len(name) for name in process_names.itervalues())
    for key in process_names:
        process_names[key] = string.ljust(process_names[key],
                                          max_process_name_len)

    #multiprocessing.current_process().name = process_names['parent']

    # -----------------------------------------------------------------------
    # build indices on main process, since that only needs to be done once
    # -----------------------------------------------------------------------
    waiting_for_indices = len(
        state_db.select_by_state(CopyStateDB.STATE_WAITING_FOR_INDICES))
    if waiting_for_indices and waiting_for_indices < len(manifests):
        log.warn(
            "prior attempt maybe failed; you can rerun from scratch with --restart"
        )

    if waiting_for_indices > 0:
        log.info("building indices")
        copier.copy_indexes(manifests, args.drop)
        for manifest in manifests:
            state_db.update_state(manifest, CopyStateDB.STATE_INITIAL_COPY)

    # -----------------------------------------------------------------------
    # perform initial copy, if it hasn't been done yet
    # -----------------------------------------------------------------------
    in_initial_copy = len(
        state_db.select_by_state(CopyStateDB.STATE_INITIAL_COPY))
    if in_initial_copy and in_initial_copy < len(manifests):
        log.warn(
            "prior attempt maybe failed; you can rerun from scratch with --restart"
        )

    if in_initial_copy > 0:
        # each worker process copies one shard
        processes = []
        for manifest in manifests:
            name = process_names[repr(manifest)]
            process = multiprocessing.Process(target=copier.copy_collection,
                                              name=name,
                                              kwargs=dict(
                                                  manifest=manifest,
                                                  state_path=state_db._path,
                                                  percent=args.percent))
            process.start()
            processes.append(process)

        # wait for all workers to finish
        utils.wait_for_processes(processes)
Exemplo n.º 5
0
    args = parser.parse_args()

    dest = utils.parse_mongo_url(args.dest)
    if os.path.exists(args.source):
        sources = utils.parse_source_file(args.source)
    else:
        sources = [utils.parse_mongo_url(args.source)]

    if args.ids_file and args.recent_ops:
        raise ValueError(
            "the --ids-file and --recent-ops parameters cannot be combined")

    # finally, compare stuff!
    processes = []
    for source in sources:
        name = "%s:%s" % (source['host'], source['port'])
        process = Process(target=compare_collections,
                          name=name,
                          kwargs=dict(
                              source=source,
                              dest=dest,
                              percent=args.percent,
                              error_bp=args.error_bp,
                              recent_ops=args.recent_ops,
                              ids_file=args.ids_file,
                          ))
        process.start()

    utils.wait_for_processes(processes)
Exemplo n.º 6
0
def copy_collection_parent(sources, dest, state_db, args):
    """
    drive the collection copying process by delegating work to a pool of worker processes
    """

    # ensure state db has rows for each source/dest pair
    for source in sources:
        state_db.add_source_and_dest(source, dest)

    # space-pad all process names so that tabular output formats line up
    process_names = {
        repr(source): "%s:%d" % (source['host'], source['port'])
        for source in sources
    }
    process_names['parent'] = PARENT_PROCESS_NAME
    max_process_name_len = max(
        len(name) for name in process_names.itervalues())
    for key in process_names:
        process_names[key] = string.ljust(process_names[key],
                                          max_process_name_len)

    multiprocessing.current_process().name = process_names['parent']

    # -----------------------------------------------------------------------
    # perform initial copy, if it hasn't been done yet
    # -----------------------------------------------------------------------
    in_initial_copy = len(
        state_db.select_by_state(CopyStateDB.STATE_INITIAL_COPY))
    if in_initial_copy and in_initial_copy < len(sources):
        die("prior attempt at initial copy failed; rerun with --restart")
    if in_initial_copy > 0:
        ensure_empty_dest(dest)

        # each worker process copies one shard
        processes = []
        for source in sources:
            name = process_names[repr(source)]
            process = multiprocessing.Process(target=copier.copy_collection,
                                              name=name,
                                              kwargs=dict(
                                                  source=source,
                                                  dest=dest,
                                                  state_path=state_db._path,
                                                  percent=args.percent))
            process.start()
            processes.append(process)

        # wait for all workers to finish
        utils.wait_for_processes(processes)

    # -----------------------------------------------------------------------
    # build indices on main process, since that only needs to be done once
    # -----------------------------------------------------------------------
    waiting_for_indices = len(
        state_db.select_by_state(CopyStateDB.STATE_WAITING_FOR_INDICES))
    if waiting_for_indices and waiting_for_indices < len(sources):
        die("not all initial copies have been completed; rerun with --restart")
    if waiting_for_indices > 0:
        log.info("building indices")
        copier.copy_indexes(sources[0], dest)
        for source in sources:
            state_db.update_state(source, dest,
                                  CopyStateDB.STATE_APPLYING_OPLOG)
Exemplo n.º 7
-1
        help='verify documents touched by the last N ops')

    args = parser.parse_args()

    dest = utils.parse_mongo_url(args.dest)
    if os.path.exists(args.source):
        sources = utils.parse_source_file(args.source)
    else:
        sources = [utils.parse_mongo_url(args.source)]

    if args.ids_file and args.recent_ops:
        raise ValueError("the --ids-file and --recent-ops parameters cannot be combined")

    # finally, compare stuff!
    processes = []
    for source in sources:
        name = "%s:%s" % (source['host'], source['port'])
        process = Process(target=compare_collections,
                          name=name,
                          kwargs=dict(
                            source=source,
                            dest=dest,
                            percent=args.percent,
                            error_bp=args.error_bp,
                            recent_ops=args.recent_ops,
                            ids_file=args.ids_file,
                          ))
        process.start()

    utils.wait_for_processes(processes)