def setUp(self) -> None: """ Initialize a temporary database """ self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path)
def setUp(self): self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) self.task_id = self.db.new_task("test_mock", MockBlueprint.BLUEPRINT_TYPE) self.task_run_id = get_test_task_run(self.db) self.task_run = TaskRun(self.db, self.task_run_id) architect_config = OmegaConf.structured( MephistoConfig( architect=MockArchitectArgs(should_run_server=True), )) self.architect = MockArchitect(self.db, architect_config, EMPTY_STATE, self.task_run, self.data_dir) self.architect.prepare() self.architect.deploy() self.urls = self.architect._get_socket_urls() # FIXME self.url = self.urls[0] self.provider = MockProvider(self.db) self.provider.setup_resources_for_task_run(self.task_run, self.task_run.args, self.url) self.launcher = TaskLauncher(self.db, self.task_run, self.get_mock_assignment_data_array()) self.launcher.create_assignments() self.launcher.launch_units(self.url) self.sup = None
def list_requesters(): """Lists all registered requesters""" from mephisto.core.local_database import LocalMephistoDB from tabulate import tabulate db = LocalMephistoDB() requesters = db.find_requesters() dict_requesters = [r.to_dict() for r in requesters] click.echo(tabulate(dict_requesters, headers="keys"))
def setUp(self) -> None: """ Setup should put together any requirements for starting the database for a test. """ if not self.warned_about_setup: print( "Provider tests require using a test account for that crowd provider, " "you may need to set this up on your own." ) self.warned_about_setup = True self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path)
def setUp(self) -> None: """ Setup should put together any requirements for starting the database for a test. """ self.data_dir = tempfile.mkdtemp() self.build_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) # TODO(#97) we need to actually pull the task type from the Blueprint self.task_run = TaskRun(self.db, get_test_task_run(self.db)) # TODO(#97) create a mock agent with the given task type? self.TaskRunnerClass = self.BlueprintClass.TaskRunnerClass self.AgentStateClass = self.BlueprintClass.AgentStateClass self.TaskBuilderClass = self.BlueprintClass.TaskBuilderClass
def register_provider(args): """Register a requester with a crowd provider""" if len(args) == 0: click.echo( "Usage: mephisto register <provider_type> --arg1:value --arg2:value" ) return from mephisto.core.local_database import LocalMephistoDB from mephisto.core.registry import get_crowd_provider_from_type from mephisto.core.argparse_parser import parse_arg_dict, get_extra_argument_dicts provider_type, requester_args = args[0], args[1:] args_dict = dict(arg.split(":") for arg in requester_args) transformed = dict((key, { "option_string": key, "value": value }) for (key, value) in args_dict.items()) crowd_provider = get_crowd_provider_from_type(provider_type) RequesterClass = crowd_provider.RequesterClass if len(requester_args) == 0: from tabulate import tabulate params = get_extra_argument_dicts(RequesterClass) for param in params: click.echo(param["desc"]) click.echo(tabulate(param["args"].values(), headers="keys")) return try: parsed_options = parse_arg_dict(RequesterClass, transformed) except Exception as e: click.echo(str(e)) if "name" not in parsed_options: click.echo("No name was specified for the requester.") db = LocalMephistoDB() requesters = db.find_requesters(requester_name=parsed_options["name"]) if len(requesters) == 0: requester = RequesterClass.new(db, parsed_options["name"]) else: requester = requesters[0] try: requester.register(parsed_options) click.echo("Registered successfully.") except Exception as e: click.echo(str(e))
class TestLocalMephistoDB(BaseDatabaseTests): """ Unit testing for the LocalMephistoDB Inherits all tests directly from BaseDataModelTests, and writes no additional tests. """ def setUp(self): self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) def tearDown(self): self.db.shutdown() shutil.rmtree(self.data_dir)
def _set_up_config( self, blueprint_type: str, task_directory: str, overrides: Optional[List[str]] = None, ): """ Set up the config and database. Uses the Hydra compose() API for unit testing and a temporary directory to store the test database. :param blueprint_type: string uniquely specifying Blueprint class :param task_directory: directory containing the `conf/` configuration folder. Will be injected as `${task_dir}` in YAML files. :param overrides: additional config overrides """ # Define the configuration settings relative_task_directory = os.path.relpath(task_directory, os.path.dirname(__file__)) relative_config_path = os.path.join(relative_task_directory, 'conf') if overrides is None: overrides = [] with initialize(config_path=relative_config_path): self.config = compose( config_name="example", overrides=[ f'+mephisto.blueprint._blueprint_type={blueprint_type}', f'+mephisto/architect=mock', f'+mephisto/provider=mock', f'+task_dir={task_directory}', f'+current_time={int(time.time())}', ] + overrides, ) # TODO: when Hydra 1.1 is released with support for recursive defaults, # don't manually specify all missing blueprint args anymore, but # instead define the blueprint in the defaults list directly. # Currently, the blueprint can't be set in the defaults list without # overriding params in the YAML file, as documented at # https://github.com/facebookresearch/hydra/issues/326 and as fixed in # https://github.com/facebookresearch/hydra/pull/1044. self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) self.config = augment_config_from_db(self.config, self.db) self.config.mephisto.architect.should_run_server = True
class TestMTurkComponents(unittest.TestCase): """ Unit testing for components of the MTurk crowd provider """ def setUp(self) -> None: """ Initialize a temporary database """ self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) def tearDown(self) -> None: """ Delete the temporary database """ self.db.shutdown() shutil.rmtree(self.data_dir) @pytest.mark.req_creds def test_create_and_find_worker(self) -> None: """Ensure we can find a worker by MTurk id""" db = self.db TEST_MTURK_WORKER_ID = "ABCDEFGHIJ" test_worker = MTurkWorker.new(db, TEST_MTURK_WORKER_ID) test_worker_2 = Worker(db, test_worker.db_id) self.assertEqual( test_worker.worker_name, test_worker_2.worker_name, "Worker gotten from db not same as first init", ) test_worker_3 = MTurkWorker.get_from_mturk_worker_id( db, TEST_MTURK_WORKER_ID) self.assertEqual( test_worker.worker_name, test_worker_3.worker_name, "Worker gotten from db not same as first init", ) failed_worker = MTurkWorker.get_from_mturk_worker_id(db, "FAKE_ID") self.assertIsNone(failed_worker, f"Found worker {failed_worker} from a fake id")
def setUp(self) -> None: """ Setup should put together any requirements for starting the database for a test. """ try: _ = self.ArchitectClass except: raise unittest.SkipTest("Skipping test as no ArchitectClass set") if not self.warned_about_setup: print( "Architect tests may require using an account with the server provider " "in order to function properly. Make sure these are configured before testing." ) self.warned_about_setup = True self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) self.build_dir = tempfile.mkdtemp() self.task_run = TaskRun(self.db, get_test_task_run(self.db)) builder = MockTaskBuilder(self.task_run, {}) builder.build_in_dir(self.build_dir)
def get_db_from_config(cfg: DictConfig) -> "MephistoDB": """ Get a MephistoDB from the given configuration. As of now this defaults to a LocalMephistoDB """ datapath = cfg.mephisto.get("datapath", None) if datapath is None: datapath = get_root_data_dir() database_path = os.path.join(datapath, "database.db") return LocalMephistoDB(database_path=database_path)
def check(): """Checks that mephisto is setup correctly""" from mephisto.core.local_database import LocalMephistoDB from mephisto.core.utils import get_mock_requester try: db = LocalMephistoDB() get_mock_requester(db) except Exception as e: click.echo("Something went wrong.") click.echo(e) return click.echo("Mephisto seems to be set up correctly.")
class CrowdProviderTests(unittest.TestCase): """ This class contains the basic data model tests that should be passable for a crowd """ CrowdProviderClass: Type[CrowdProvider] db: MephistoDB data_dir: str warned_about_setup = False @classmethod def setUpClass(cls): """ Only run tests on subclasses of this class, as this class is just defining the testing interface and the tests to run on a DB that adheres to that interface """ if cls is CrowdProviderTests: raise unittest.SkipTest("Skip CrowdProviderTests tests, it's a base class") super(CrowdProviderTests, cls).setUpClass() def get_test_worker_name(self) -> str: """Return a worker name that is usable for testing with this crowdprovider""" raise NotImplementedError() def get_test_requester_name(self) -> str: """Return a requester name that is usable for testing with this crowdprovider""" raise NotImplementedError() def get_test_requester_balance(self, requester_name: str) -> float: """Get the amount that test accounts are expected to have""" raise NotImplementedError() def setUp(self) -> None: """ Setup should put together any requirements for starting the database for a test. """ if not self.warned_about_setup: print( "Provider tests require using a test account for that crowd provider, " "you may need to set this up on your own." ) self.warned_about_setup = True self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) def tearDown(self) -> None: """ tearDown should clear up anything that was set up or used in any of the tests in this class. Generally this means cleaning up the database that was set up. """ self.db.shutdown() shutil.rmtree(self.data_dir) def get_test_requester(self) -> Requester: """Create a requester to use in tests and register it""" db: MephistoDB = self.db RequesterClass = self.CrowdProviderClass.RequesterClass return RequesterClass.new(db, self.get_test_requester_name()) def test_init_registers_datastore(self) -> None: """Ensure that initializing the crowd provider registers a datastore with the database, as this is required functionality for all crowd providers. """ ProviderClass = self.CrowdProviderClass self.assertFalse( self.db.has_datastore_for_provider(ProviderClass.PROVIDER_TYPE) ) # Initialize the provider provider = ProviderClass(self.db) self.assertTrue(self.db.has_datastore_for_provider(ProviderClass.PROVIDER_TYPE)) def test_init_object_registers_datastore(self) -> None: """Ensure that initializing the crowd provider registers a datastore with the database, as this is required functionality for all crowd providers. """ ProviderClass = self.CrowdProviderClass self.assertFalse( self.db.has_datastore_for_provider(ProviderClass.PROVIDER_TYPE) ) # Initialize the requester RequesterClass = ProviderClass.RequesterClass requester = RequesterClass.new(self.db, self.get_test_requester_name()) self.assertTrue(self.db.has_datastore_for_provider(ProviderClass.PROVIDER_TYPE)) def test_requester(self) -> None: """Ensure we can create and use a requester""" db: MephistoDB = self.db RequesterClass = self.CrowdProviderClass.RequesterClass test_requester = RequesterClass.new(db, self.get_test_requester_name()) test_requester_2 = Requester(db, test_requester.db_id) self.assertEqual( test_requester.requester_name, test_requester_2.requester_name, "Requester gotten from db not same as first init", ) # Ensure credential registration works # TODO(#97) ensure registration fails when we programatically login to an account # in the future # self.assertFalse(test_requester.is_registered()) test_requester.register() self.assertTrue(test_requester.is_registered()) # Ensure requester methods work avail_budget = test_requester.get_available_budget() test_budget = self.get_test_requester_balance(test_requester.requester_name) self.assertEqual( avail_budget, test_budget, "Queried budget from `get_available_budget` not equal to `test_budget`", ) def test_worker(self) -> None: """Ensure we can query and use a worker""" db: MephistoDB = self.db requester = self.get_test_requester() WorkerClass = self.CrowdProviderClass.WorkerClass test_worker = WorkerClass.new(db, self.get_test_worker_name()) test_worker_2 = Worker(db, test_worker.db_id) self.assertEqual( test_worker.worker_name, test_worker_2.worker_name, "Worker gotten from db not same as first init", ) # Ensure blocking is doable test_worker.block_worker("Test reason", requester=requester) self.assertTrue(test_worker.is_blocked(requester)) test_worker.unblock_worker("Test reason", requester=requester) self.assertFalse(test_worker.is_blocked(requester))
def parse_launch_arguments( self, args=None) -> Tuple[str, str, "MephistoDB", Dict[str, Any]]: """ Parse common arguments out from the command line, returns a tuple of the architect type, the requester name to use, the MephistoDB to run with, and any additional arguments parsed out by the argument parser Defaults to a mock architect with a mock requester with no arguments """ args, _unknown = self.parse_known_args(args=args) arg_dict = vars(args) requester_name = arg_dict["requester_name"] provider_type = arg_dict["provider_type"] architect_type = arg_dict["architect_type"] datapath = arg_dict["datapath"] if datapath is None: datapath = get_root_data_dir() database_path = os.path.join(datapath, "database.db") db = LocalMephistoDB(database_path=database_path) if requester_name is None: if provider_type is None: print("No requester specified, defaulting to mock") provider_type = "mock" if provider_type == "mock": req = get_mock_requester(db) requester_name = req.requester_name else: reqs = db.find_requesters(provider_type=provider_type) # TODO (#93) proper logging if len(reqs) == 0: print( f"No requesters found for provider type {provider_type}, please " f"register one. You can register with `mephisto register {provider_type}`, " f"or `python mephisto/client/cli.py register {provider_type}` if you haven't " "installed Mephisto using poetry.") exit(1) elif len(reqs) == 1: req = reqs[0] requester_name = req.requester_name print( f"Found one `{provider_type}` requester to launch with: {requester_name}" ) else: req = reqs[-1] requester_name = req.requester_name print( f"Found many `{provider_type}` requesters to launch with, " f"choosing the most recent: {requester_name}") else: # Ensure provided requester exists reqs = db.find_requesters(requester_name=requester_name) if len(reqs) == 0: print(f"No requesters found under name {requester_name}, " "have you registered with `mephisto register`?") exit(1) provider_type = reqs[0].provider_type # provider type and requester name now set, ensure architect if architect_type is None: if provider_type == "mock": architect_type = "local" elif provider_type == "mturk_sandbox": architect_type = "heroku" elif provider_type == "mturk": architect_type = "heroku" else: architect_type = "local" # TODO (#93) proper logging print(f"No architect specified, defaulting to architect " f"`{architect_type}` for provider `{provider_type}`") if provider_type in ["mturk"]: input( f"This task is going to launch live on {provider_type}, press enter to continue: " ) return architect_type, requester_name, db, arg_dict
def setUp(self): self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) self.task_run_id = get_test_task_run(self.db) self.task_run = TaskRun(self.db, self.task_run_id)
class TestTaskLauncher(unittest.TestCase): """ Unit testing for the Mephisto TaskLauncher """ def setUp(self): self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) self.task_run_id = get_test_task_run(self.db) self.task_run = TaskRun(self.db, self.task_run_id) def tearDown(self): self.db.shutdown() shutil.rmtree(self.data_dir) @staticmethod def get_mock_assignment_data_array() -> List[InitializationData]: return [MockTaskRunner.get_mock_assignment_data()] @staticmethod def get_mock_assignment_data_generator() -> Iterable[InitializationData]: for _ in range(NUM_GENERATED_ASSIGNMENTS): yield MockTaskRunner.get_mock_assignment_data() time.sleep(WAIT_TIME_TILL_NEXT_ASSIGNMENT) def test_init_on_task_run(self): """Initialize a launcher on a task_run""" launcher = TaskLauncher(self.db, self.task_run, self.get_mock_assignment_data_array()) self.assertEqual(self.db, launcher.db) self.assertEqual(self.task_run, launcher.task_run) self.assertEqual(len(launcher.assignments), 0) self.assertEqual(len(launcher.units), 0) self.assertEqual(launcher.provider_type, MockProvider.PROVIDER_TYPE) def test_create_launch_expire_assignments(self): """Initialize a launcher on a task run, then create the assignments""" mock_data_array = self.get_mock_assignment_data_array() launcher = TaskLauncher(self.db, self.task_run, mock_data_array) launcher.create_assignments() self.assertEqual( len(launcher.assignments), len(mock_data_array), "Inequal number of assignments existed than were launched", ) self.assertEqual( len(launcher.units), len(mock_data_array) * len(mock_data_array[0]["unit_data"]), "Inequal number of units created than were expected", ) for unit in launcher.units: self.assertEqual(unit.get_db_status(), AssignmentState.CREATED) for assignment in launcher.assignments: self.assertEqual(assignment.get_status(), AssignmentState.CREATED) launcher.launch_units("dummy-url:3000") for unit in launcher.units: self.assertEqual(unit.get_db_status(), AssignmentState.LAUNCHED) time.sleep(WAIT_TIME_TILL_NEXT_UNIT) for assignment in launcher.assignments: self.assertEqual(assignment.get_status(), AssignmentState.LAUNCHED) launcher.expire_units() for unit in launcher.units: self.assertEqual(unit.get_db_status(), AssignmentState.EXPIRED) for assignment in launcher.assignments: self.assertEqual(assignment.get_status(), AssignmentState.EXPIRED) def test_launch_assignments_with_concurrent_unit_cap(self): """Initialize a launcher on a task run, then create the assignments""" cap_values = [1, 2, 3, 4, 5] for max_num_units in cap_values: mock_data_array = self.get_mock_assignment_data_array() launcher = TaskLauncher( self.db, self.task_run, mock_data_array, max_num_concurrent_units=max_num_units, ) launcher.launched_units = LimitedDict( launcher.max_num_concurrent_units) launcher.create_assignments() launcher.launch_units("dummy-url:3000") start_time = time.time() while set([u.get_status() for u in launcher.units ]) != {AssignmentState.COMPLETED}: for unit in launcher.units: if unit.get_status() == AssignmentState.LAUNCHED: unit.set_db_status(AssignmentState.COMPLETED) time.sleep(0.1) self.assertEqual(launcher.launched_units.exceed_limit, False) curr_time = time.time() self.assertLessEqual(curr_time - start_time, MAX_WAIT_TIME_UNIT_LAUNCH) launcher.expire_units() self.tearDown() self.setUp() def test_assignments_generator(self): """Initialize a launcher on a task run, then try generate the assignments""" mock_data_array = self.get_mock_assignment_data_generator() start_time = time.time() launcher = TaskLauncher(self.db, self.task_run, mock_data_array) launcher.create_assignments() end_time = time.time() self.assertLessEqual( end_time - start_time, (NUM_GENERATED_ASSIGNMENTS * WAIT_TIME_TILL_NEXT_ASSIGNMENT) / 2, )
def setUp(self): self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) self.requester_name, _req_id = get_test_requester(self.db) self.operator = None
class TestOperator(unittest.TestCase): """ Unit testing for the Mephisto Supervisor """ def setUp(self): self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) self.requester_name, _req_id = get_test_requester(self.db) self.operator = None def tearDown(self): if self.operator is not None: self.operator.shutdown() self.db.shutdown() shutil.rmtree(self.data_dir) self.assertTrue( len(threading.enumerate()) == 1, f"Expected only main thread at teardown, found {threading.enumerate()}", ) def wait_for_complete_assignment(self, assignment, timeout: int): start_time = time.time() while time.time() - start_time < timeout: if assignment.get_status() == AssignmentState.COMPLETED: break time.sleep(0.1) self.assertLess(time.time() - start_time, timeout, "Assignment not completed in time") def await_server_start(self, architect: "MockArchitect"): start_time = time.time() assert architect.server is not None, "Cannot wait on empty server" while time.time() - start_time < 5: if len(architect.server.subs) > 0: break time.sleep(0.1) self.assertLess(time.time() - start_time, 5, "Mock server not up in time") def test_initialize_supervisor(self): """Quick test to ensure that the operator can be initialized""" self.operator = Operator(self.db) def test_run_job_concurrent(self): """Ensure that the supervisor object can even be created""" self.operator = Operator(self.db) config = MephistoConfig( blueprint=MockBlueprintArgs( num_assignments=1, is_concurrent=True, ), provider=MockProviderArgs(requester_name=self.requester_name, ), architect=MockArchitectArgs(should_run_server=True), task=MOCK_TASK_ARGS, ) self.operator.validate_and_run_config(OmegaConf.structured(config)) tracked_runs = self.operator.get_running_task_runs() self.assertEqual(len(tracked_runs), 1, "Run not launched") task_run_id, tracked_run = list(tracked_runs.items())[0] self.assertIsNotNone(tracked_run) self.assertIsNotNone(tracked_run.task_launcher) self.assertIsNotNone(tracked_run.task_runner) self.assertIsNotNone(tracked_run.architect) self.assertIsNotNone(tracked_run.task_run) self.assertEqual(tracked_run.task_run.db_id, task_run_id) # Create two agents to step through the task architect = tracked_run.architect self.assertIsInstance(architect, MockArchitect, "Must use mock in testing") # Register a worker mock_worker_name = "MOCK_WORKER" architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_id = workers[0].db_id self.assertEqual(len(tracked_run.task_runner.running_assignments), 0) # Register an agent mock_agent_details = "FAKE_ASSIGNMENT" architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 1, "Agent was not created properly") agent = agents[0] self.assertIsNotNone(agent) # Register another worker mock_worker_name = "MOCK_WORKER_2" architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_id = workers[0].db_id # Register an agent mock_agent_details = "FAKE_ASSIGNMENT_2" architect.server.register_mock_agent(worker_id, mock_agent_details) # Give up to 5 seconds for whole mock task to complete start_time = time.time() while time.time() - start_time < TIMEOUT_TIME: if len(self.operator.get_running_task_runs()) == 0: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Task not completed in time") # Ensure the assignment is completed task_run = tracked_run.task_run assignment = task_run.get_assignments()[0] self.assertEqual(assignment.get_status(), AssignmentState.COMPLETED) def test_run_job_not_concurrent(self): """Ensure that the supervisor object can even be created""" self.operator = Operator(self.db) config = MephistoConfig( blueprint=MockBlueprintArgs( num_assignments=1, is_concurrent=False, ), provider=MockProviderArgs(requester_name=self.requester_name, ), architect=MockArchitectArgs(should_run_server=True), task=MOCK_TASK_ARGS, ) self.operator.validate_and_run_config(OmegaConf.structured(config)) tracked_runs = self.operator.get_running_task_runs() self.assertEqual(len(tracked_runs), 1, "Run not launched") task_run_id, tracked_run = list(tracked_runs.items())[0] self.assertIsNotNone(tracked_run) self.assertIsNotNone(tracked_run.task_launcher) self.assertIsNotNone(tracked_run.task_runner) self.assertIsNotNone(tracked_run.architect) self.assertIsNotNone(tracked_run.task_run) self.assertEqual(tracked_run.task_run.db_id, task_run_id) # Create two agents to step through the task architect = tracked_run.architect self.assertIsInstance(architect, MockArchitect, "Must use mock in testing") # Register a worker mock_worker_name = "MOCK_WORKER" architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_id = workers[0].db_id self.assertEqual(len(tracked_run.task_runner.running_assignments), 0) # Register an agent mock_agent_details = "FAKE_ASSIGNMENT" architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 1, "Agent was not created properly") agent = agents[0] self.assertIsNotNone(agent) # Register another worker mock_worker_name = "MOCK_WORKER_2" architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_id = workers[0].db_id # Register an agent mock_agent_details = "FAKE_ASSIGNMENT_2" architect.server.register_mock_agent(worker_id, mock_agent_details) # Give up to 5 seconds for both tasks to complete start_time = time.time() while time.time() - start_time < TIMEOUT_TIME: if len(self.operator.get_running_task_runs()) == 0: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Task not completed in time") # Ensure the assignment is completed task_run = tracked_run.task_run assignment = task_run.get_assignments()[0] self.assertEqual(assignment.get_status(), AssignmentState.COMPLETED) def test_run_jobs_with_restrictions(self): """Ensure allowed_concurrent and maximum_units_per_worker work""" self.operator = Operator(self.db) provider_args = MockProviderArgs(requester_name=self.requester_name, ) architect_args = MockArchitectArgs(should_run_server=True) config = MephistoConfig( blueprint=MockBlueprintArgs( num_assignments=3, is_concurrent=True, ), provider=provider_args, architect=architect_args, task=TaskConfigArgs( task_title="title", task_description="This is a description", task_reward="0.3", task_tags="1,2,3", maximum_units_per_worker=2, allowed_concurrent=1, task_name='max-unit-test', ), ) self.operator.validate_and_run_config(OmegaConf.structured(config)) tracked_runs = self.operator.get_running_task_runs() self.assertEqual(len(tracked_runs), 1, "Run not launched") task_run_id, tracked_run = list(tracked_runs.items())[0] self.assertIsNotNone(tracked_run) self.assertIsNotNone(tracked_run.task_launcher) self.assertIsNotNone(tracked_run.task_runner) self.assertIsNotNone(tracked_run.architect) self.assertIsNotNone(tracked_run.task_run) self.assertEqual(tracked_run.task_run.db_id, task_run_id) self.await_server_start(tracked_run.architect) # Create two agents to step through the task architect = tracked_run.architect self.assertIsInstance(architect, MockArchitect, "Must use mock in testing") # Register a worker mock_worker_name = "MOCK_WORKER" architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_id_1 = workers[0].db_id self.assertEqual(len(tracked_run.task_runner.running_assignments), 0) # Register an agent mock_agent_details = "FAKE_ASSIGNMENT" architect.server.register_mock_agent(worker_id_1, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 1, "Agent was not created properly") agent = agents[0] self.assertIsNotNone(agent) # Try to register a second agent, which should fail due to concurrency mock_agent_details = "FAKE_ASSIGNMENT_2" architect.server.register_mock_agent(worker_id_1, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 1, "Second agent was created") # Register another worker mock_worker_name = "MOCK_WORKER_2" architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_id_2 = workers[0].db_id # Register an agent mock_agent_details = "FAKE_ASSIGNMENT_2" architect.server.register_mock_agent(worker_id_2, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 2, "Second agent was not created") # wait for task to pass self.wait_for_complete_assignment( agents[1].get_unit().get_assignment(), 3) # Pass a second task as well mock_agent_details = "FAKE_ASSIGNMENT_3" architect.server.register_mock_agent(worker_id_1, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 3, "Agent was not created properly") mock_agent_details = "FAKE_ASSIGNMENT_4" architect.server.register_mock_agent(worker_id_2, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 4, "Fourth agent was not created") # wait for task to pass self.wait_for_complete_assignment( agents[3].get_unit().get_assignment(), 3) # Both workers should have saturated their tasks, and not be granted agents mock_agent_details = "FAKE_ASSIGNMENT_5" architect.server.register_mock_agent(worker_id_1, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 4, "Additional agent was created") architect.server.register_mock_agent(worker_id_2, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 4, "Additional agent was created") # new workers should be able to work on these just fine though mock_worker_name = "MOCK_WORKER_3" architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_id_3 = workers[0].db_id mock_worker_name = "MOCK_WORKER_4" architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_id_4 = workers[0].db_id # Register agents from new workers mock_agent_details = "FAKE_ASSIGNMENT_5" architect.server.register_mock_agent(worker_id_3, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 5, "Additional agent was not created") mock_agent_details = "FAKE_ASSIGNMENT_6" architect.server.register_mock_agent(worker_id_4, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 6, "Additional agent was not created") # wait for task to pass self.wait_for_complete_assignment( agents[5].get_unit().get_assignment(), 3) # Give up to 5 seconds for whole mock task to complete start_time = time.time() while time.time() - start_time < TIMEOUT_TIME: if len(self.operator.get_running_task_runs()) == 0: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Task not completed in time") # Ensure all assignments are completed task_run = tracked_run.task_run assignments = task_run.get_assignments() for assignment in assignments: self.assertEqual(assignment.get_status(), AssignmentState.COMPLETED) # Create a new task config = MephistoConfig( blueprint=MockBlueprintArgs( num_assignments=1, is_concurrent=True, ), provider=MockProviderArgs(requester_name=self.requester_name, ), architect=MockArchitectArgs(should_run_server=True), task=TaskConfigArgs( task_title="title", task_description="This is a description", task_reward="0.3", task_tags="1,2,3", maximum_units_per_worker=2, allowed_concurrent=1, task_name='max-unit-test', ), ) self.operator.validate_and_run_config(OmegaConf.structured(config)) tracked_runs = self.operator.get_running_task_runs() self.assertEqual(len(tracked_runs), 1, "Run not launched") task_run_id, tracked_run = list(tracked_runs.items())[0] self.await_server_start(tracked_run.architect) architect = tracked_run.architect # Workers one and two still shouldn't be able to make agents mock_agent_details = "FAKE_ASSIGNMENT_7" architect.server.register_mock_agent(worker_id_1, mock_agent_details) agents = self.db.find_agents() self.assertEqual( len(agents), 6, "Additional agent was created for worker exceeding max units", ) mock_agent_details = "FAKE_ASSIGNMENT_7" architect.server.register_mock_agent(worker_id_2, mock_agent_details) agents = self.db.find_agents() self.assertEqual( len(agents), 6, "Additional agent was created for worker exceeding max units", ) # Three and four should though mock_agent_details = "FAKE_ASSIGNMENT_7" architect.server.register_mock_agent(worker_id_3, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 7, "Additional agent was not created") mock_agent_details = "FAKE_ASSIGNMENT_8" architect.server.register_mock_agent(worker_id_4, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 8, "Additional agent was not created") # Ensure the task run completed and that all assignments are done start_time = time.time() while time.time() - start_time < TIMEOUT_TIME: if len(self.operator.get_running_task_runs()) == 0: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Task not completed in time") task_run = tracked_run.task_run assignments = task_run.get_assignments() for assignment in assignments: self.assertEqual(assignment.get_status(), AssignmentState.COMPLETED)
def setUp(self): self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path)
class BlueprintTests(unittest.TestCase): """ This class contains the basic data model tests that should be passable for a blueprint. Runs the tests on the Blueprint, which is the entry point the components to run the task. """ BlueprintClass: ClassVar[Type[Blueprint]] db: LocalMephistoDB data_dir: str build_dir: str @classmethod def setUpClass(cls): """ Only run tests on subclasses of this class, as this class is just defining the testing interface and the tests to run any class that implements the TaskRunner interface """ if cls is BlueprintTests: raise unittest.SkipTest("Skip BlueprintTests tests, it's a base class") super(BlueprintTests, cls).setUpClass() # Implementations of this test suite should implement the following methods. # See the mock blueprint for examples def task_is_built(self, build_dir) -> bool: """Ensure that a properly built version of this task is present in this dir""" raise NotImplementedError() def assignment_completed_successfully(self, assignment: Assignment) -> bool: """Validate that an assignment is able to be run successfully""" raise NotImplementedError() def get_test_assignment(self) -> Assignment: """Create a test assignment for self.task_run using mock agents""" raise NotImplementedError() def assignment_is_tracked( self, task_runner: TaskRunner, assignment: Assignment ) -> bool: """ Return whether or not this task is currently being tracked (run) by the given task runner. This should be false unless run_assignment is still ongoing for a task. """ raise NotImplementedError() # Test suite methods def setUp(self) -> None: """ Setup should put together any requirements for starting the database for a test. """ self.data_dir = tempfile.mkdtemp() self.build_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) # TODO(#97) we need to actually pull the task type from the Blueprint self.task_run = TaskRun(self.db, get_test_task_run(self.db)) # TODO(#97) create a mock agent with the given task type? self.TaskRunnerClass = self.BlueprintClass.TaskRunnerClass self.AgentStateClass = self.BlueprintClass.AgentStateClass self.TaskBuilderClass = self.BlueprintClass.TaskBuilderClass def tearDown(self) -> None: """ tearDown should clear up anything that was set up or used in any of the tests in this class. Generally this means cleaning up the database that was set up. """ self.db.shutdown() shutil.rmtree(self.data_dir) def _get_init_task_runner(self) -> TaskRunner: """Get an initialized task runner of TaskRunnerClass""" args = self.BlueprintClass.ArgsClass() config = OmegaConf.structured(MephistoConfig(blueprint=args)) shared_state = self.BlueprintClass.SharedStateClass() return self.TaskRunnerClass(self.task_run, config, shared_state) def _get_init_task_builder(self) -> TaskBuilder: """Get an initialized task runner of TaskBuilderClass""" args = self.BlueprintClass.ArgsClass() config = OmegaConf.structured(MephistoConfig(blueprint=args)) shared_state = self.BlueprintClass.SharedStateClass() return self.TaskBuilderClass(self.task_run, config) def test_options(self) -> None: """Test the default options, and try to break the initialization""" # TODO(#94?) implement with options implementations pass def test_has_required_class_members(self) -> None: """Ensures that the BluePrint is well-formatted""" self.assertTrue( issubclass(self.AgentStateClass, AgentState), "Implemented AgentStateClass does not extend AgentState", ) self.assertNotEqual( self.AgentStateClass, AgentState, "Can not use base AgentState in a Blueprint implementation", ) self.assertTrue( issubclass(self.TaskRunnerClass, TaskRunner), "Implemented TaskRunnerClass does not extend TaskRunner", ) self.assertNotEqual( self.TaskRunnerClass, TaskRunner, "Can not use base TaskRunner in a Blueprint implementation", ) self.assertTrue( issubclass(self.TaskBuilderClass, TaskBuilder), "Implemented TaskBuilderClass does not extend TaskBuilder", ) self.assertNotEqual( self.TaskBuilderClass, TaskBuilder, "Can not use base TaskBuilder in a Blueprint implementation", ) self.assertIn( "mock", self.BlueprintClass.supported_architects, "Must support at least the mock architecture for testing", ) # TODO(#94?) implement getting the defaults of TaskRunnerClass.get_extra_options() when # options are improved def test_abstract_initialization_works(self) -> None: """ Test that initialization from the abstract class produces the correct class. """ args = self.BlueprintClass.ArgsClass() config = OmegaConf.structured(MephistoConfig(blueprint=args)) shared_state = self.BlueprintClass.SharedStateClass() runner = TaskRunner(self.task_run, config, shared_state) # type: ignore self.assertTrue(isinstance(runner, self.TaskRunnerClass)) builder = TaskBuilder(self.task_run, config) # type: ignore self.assertTrue(isinstance(builder, self.TaskBuilderClass)) def test_can_init_subclasses(self) -> None: """Ensure the subclasses of a Blueprint can be properly initialized""" task_runner = self._get_init_task_runner() task_builder = self._get_init_task_builder() # TODO(#97) uncomment after creating a mock agent as part of this test # agent_state = self.AgentStateClass(self.agent) def test_can_build_task(self) -> None: """Ensure a task can be built up from scratch in the given directory""" task_builder = self._get_init_task_builder() task_builder.build_in_dir(self.build_dir) self.assertTrue(self.task_is_built(self.build_dir)) def test_can_run_task(self) -> None: """Ensure that a task can be run to completion in the basic case""" task_runner = self._get_init_task_runner() assignment = self.get_test_assignment() agents: List["Agent"] = [ cast("Agent", u.get_assigned_agent()) for u in assignment.get_units() ] task_runner.launch_assignment(assignment, agents) self.assertTrue(self.assignment_completed_successfully(assignment)) def test_can_exit_gracefully(self) -> None: """Ensure that a task can be run to completion when an agent disconnects""" task_runner = self._get_init_task_runner() assignment = self.get_test_assignment() fail_agent = assignment.get_units()[0].get_assigned_agent() assert fail_agent is not None, "No agent set for first unit of test assignment" assert isinstance(fail_agent, MockAgent), "Agent must be mock agent for testing" fail_agent.mark_disconnected() try: task_runner.launch_assignment(assignment, [fail_agent]) except Exception as e: task_runner.cleanup_assignment(assignment) self.assertFalse(self.assignment_completed_successfully(assignment)) self.assertFalse(self.assignment_is_tracked(task_runner, assignment)) def test_run_tracked(self) -> None: """Run a task in a thread, ensure we see it is being tracked""" task_runner = self._get_init_task_runner() assignment = self.get_test_assignment() agents: List["Agent"] = [ cast("Agent", u.get_assigned_agent()) for u in assignment.get_units() ] task_thread = threading.Thread( target=task_runner.launch_assignment, args=(assignment, agents) ) self.assertFalse(self.assignment_is_tracked(task_runner, assignment)) task_thread.start() time.sleep(0.1) # Sleep to give the task_runner time to register self.assertTrue(self.assignment_is_tracked(task_runner, assignment)) task_thread.join() self.assertFalse(self.assignment_is_tracked(task_runner, assignment)) self.assertTrue(self.assignment_completed_successfully(assignment))
class ArchitectTests(unittest.TestCase): """ This class contains the basic data model tests that should be passable for a crowd """ ArchitectClass: Type[Architect] db: MephistoDB data_dir: str build_dir: str warned_about_setup = False # Implementations of this test suite should implement the following methods. # See the mock architect for examples def server_is_prepared(self, build_dir: str) -> bool: """Ensure that the server is ready to be deployed from the given directory""" raise NotImplementedError() def server_is_cleaned(self, build_dir: str) -> bool: """Validate that server files are cleaned up following a deploy""" raise NotImplementedError() def server_is_shutdown(self) -> bool: """Validate that server is no longer running, ie shutdown successfully called""" raise NotImplementedError() def server_is_up(self, url: str) -> bool: """Ping the url to see if anything is running""" if url.endswith("/"): url = url[:-1] alive_url = url + "/is_alive" try: response = requests.get(alive_url) except requests.ConnectionError: return False return response.status_code == 200 @classmethod def setUpClass(cls): """ Only run tests on subclasses of this class, as this class is just defining the testing interface and the tests to run on an architect that adheres to the interface """ if cls is ArchitectTests: raise unittest.SkipTest( "Skip ArchitectTests tests, it's a base class") super(ArchitectTests, cls).setUpClass() def setUp(self) -> None: """ Setup should put together any requirements for starting the database for a test. """ try: _ = self.ArchitectClass except: raise unittest.SkipTest("Skipping test as no ArchitectClass set") if not self.warned_about_setup: print( "Architect tests may require using an account with the server provider " "in order to function properly. Make sure these are configured before testing." ) self.warned_about_setup = True self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) self.build_dir = tempfile.mkdtemp() self.task_run = TaskRun(self.db, get_test_task_run(self.db)) builder = MockTaskBuilder(self.task_run, {}) builder.build_in_dir(self.build_dir) def tearDown(self) -> None: """ tearDown should clear up anything that was set up or used in any of the tests in this class. Generally this means cleaning up the database that was set up. """ self.db.shutdown() shutil.rmtree(self.data_dir) shutil.rmtree(self.build_dir) def test_init_architect(self) -> None: """Simple test to ensure that an architect can be initialized with default arguments, and that it is the correct class """ self.assertTrue( issubclass(self.ArchitectClass, Architect), "Implemented ArchitectClass does not extend Architect", ) self.assertNotEqual(self.ArchitectClass, Architect, "Can not use base Architect") opts = get_default_arg_dict(self.ArchitectClass) architect = self.ArchitectClass(self.db, opts, self.task_run, self.build_dir) def get_architect(self) -> Architect: """ Return an initialized architect to use in testing. Can be overridden if special parameters need to be set to run tests properly. """ opts = get_default_arg_dict(self.ArchitectClass) architect = self.ArchitectClass(self.db, opts, self.task_run, self.build_dir) return architect def test_prepare_cleanup(self) -> None: """Test preparation and cleanup for server""" architect = self.get_architect() built_dir = architect.prepare() self.assertTrue(os.path.exists(built_dir)) self.assertTrue(self.server_is_prepared(self.build_dir)) architect.cleanup() self.assertTrue(self.server_is_cleaned(self.build_dir)) def test_deploy_shutdown(self) -> None: """Test deploying the server, and shutting it down""" architect = self.get_architect() architect.prepare() self.assertTrue(self.server_is_prepared(self.build_dir)) server_url = architect.deploy() self.assertTrue(self.server_is_up(server_url)) architect.cleanup() self.assertTrue(self.server_is_cleaned(self.build_dir)) architect.shutdown() self.assertFalse(self.server_is_up(server_url)) self.assertTrue(self.server_is_shutdown())
def test_base_task(self): # Define the configuration settings relative_task_directory = os.path.relpath( TASK_DIRECTORY, os.path.dirname(__file__) ) relative_config_path = os.path.join(relative_task_directory, 'conf') with initialize(config_path=relative_config_path): self.config = compose( config_name="example", overrides=[ f'+mephisto.blueprint._blueprint_type={AcuteEvalBlueprint.BLUEPRINT_TYPE}', f'+mephisto/architect=mock', f'+mephisto/provider=mock', f'mephisto.blueprint.block_on_onboarding_fail={False}', f'+task_dir={TASK_DIRECTORY}', f'+current_time={int(time.time())}', ], ) # TODO: when Hydra 1.1 is released with support for recursive defaults, # don't manually specify all missing blueprint args anymore, but # instead define the blueprint in the defaults list directly. # Currently, the blueprint can't be set in the defaults list without # overriding params in the YAML file, as documented at # https://github.com/facebookresearch/hydra/issues/326 and as fixed in # https://github.com/facebookresearch/hydra/pull/1044. self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) self.config = augment_config_from_db(self.config, self.db) self.config.mephisto.architect.should_run_server = True self.operator = Operator(self.db) self.operator.validate_and_run_config( self.config.mephisto, shared_state=None ) channel_info = list(self.operator.supervisor.channels.values())[0] server = channel_info.job.architect.server # Register a worker mock_worker_name = "MOCK_WORKER" server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_id = workers[0].db_id # Register an agent mock_agent_details = "FAKE_ASSIGNMENT" server.register_mock_agent(worker_id, mock_agent_details) agent = self.db.find_agents()[0] agent_id_1 = agent.db_id # Set initial data server.request_init_data(agent_id_1) # Make agent act server.send_agent_act( agent_id_1, {"MEPHISTO_is_submit": True, "task_data": DESIRED_OUTPUTS} ) # Check that the inputs and outputs are as expected agent = self.db.find_agents()[0] state = agent.state.get_data() self.assertEqual(DESIRED_INPUTS, state['inputs']) self.assertEqual(DESIRED_OUTPUTS, state['outputs'])
class TestSupervisor(unittest.TestCase): """ Unit testing for the Mephisto Supervisor, uses WebsocketChannel and MockArchitect """ def setUp(self): self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) self.task_id = self.db.new_task("test_mock", MockBlueprint.BLUEPRINT_TYPE) self.task_run_id = get_test_task_run(self.db) self.task_run = TaskRun(self.db, self.task_run_id) architect_config = OmegaConf.structured( MephistoConfig( architect=MockArchitectArgs(should_run_server=True), )) self.architect = MockArchitect(self.db, architect_config, EMPTY_STATE, self.task_run, self.data_dir) self.architect.prepare() self.architect.deploy() self.urls = self.architect._get_socket_urls() # FIXME self.url = self.urls[0] self.provider = MockProvider(self.db) self.provider.setup_resources_for_task_run(self.task_run, self.task_run.args, self.url) self.launcher = TaskLauncher(self.db, self.task_run, self.get_mock_assignment_data_array()) self.launcher.create_assignments() self.launcher.launch_units(self.url) self.sup = None def tearDown(self): if self.sup is not None: self.sup.shutdown() self.launcher.expire_units() self.architect.cleanup() self.architect.shutdown() self.db.shutdown() shutil.rmtree(self.data_dir) def get_mock_assignment_data_array(self) -> List[InitializationData]: mock_data = MockTaskRunner.get_mock_assignment_data() return [mock_data, mock_data] def test_initialize_supervisor(self): """Ensure that the supervisor object can even be created""" sup = Supervisor(self.db) self.assertIsNotNone(sup) self.assertDictEqual(sup.agents, {}) self.assertDictEqual(sup.channels, {}) sup.shutdown() def test_channel_operations(self): """ Initialize a channel, and ensure the basic startup and shutdown functions are working """ sup = Supervisor(self.db) self.sup = sup TaskRunnerClass = MockBlueprint.TaskRunnerClass args = MockBlueprint.ArgsClass() config = OmegaConf.structured(MephistoConfig(blueprint=args)) task_runner = TaskRunnerClass(self.task_run, config, EMPTY_STATE) test_job = Job( architect=self.architect, task_runner=task_runner, provider=self.provider, qualifications=[], registered_channel_ids=[], ) channels = self.architect.get_channels(sup._on_channel_open, sup._on_catastrophic_disconnect, sup._on_message) channel = channels[0] channel.open() channel_id = channel.channel_id self.assertIsNotNone(channel_id) channel.close() self.assertTrue(channel.is_closed()) def test_register_concurrent_job(self): """Test registering and running a job that requires multiple workers""" # Handle baseline setup sup = Supervisor(self.db) self.sup = sup TaskRunnerClass = MockBlueprint.TaskRunnerClass args = MockBlueprint.ArgsClass() args.timeout_time = 5 args.is_concurrent = False config = OmegaConf.structured(MephistoConfig(blueprint=args)) task_runner = TaskRunnerClass(self.task_run, config, EMPTY_STATE) sup.register_job(self.architect, task_runner, self.provider) self.assertEqual(len(sup.channels), 1) channel_info = list(sup.channels.values())[0] self.assertIsNotNone(channel_info) self.assertTrue(channel_info.channel.is_alive) channel_id = channel_info.channel_id task_runner = channel_info.job.task_runner self.assertIsNotNone(channel_id) self.assertEqual( len(self.architect.server.subs), 1, "MockServer doesn't see registered channel", ) self.assertIsNotNone( self.architect.server.last_alive_packet, "No alive packet received by server", ) sup.launch_sending_thread() self.assertIsNotNone(sup.sending_thread) # Register a worker mock_worker_name = "MOCK_WORKER" self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) self.assertEqual(len(workers), 1, "Worker not successfully registered") worker = workers[0] self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) self.assertEqual(len(workers), 1, "Worker potentially re-registered") worker_id = workers[0].db_id self.assertEqual(len(task_runner.running_assignments), 0) # Register an agent mock_agent_details = "FAKE_ASSIGNMENT" self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 1, "Agent was not created properly") self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 1, "Agent may have been duplicated") agent = agents[0] self.assertIsNotNone(agent) self.assertEqual(len(sup.agents), 1, "Agent not registered with supervisor") self.assertEqual(len(task_runner.running_units), 1, "Ready task was not launched") # Register another worker mock_worker_name = "MOCK_WORKER_2" self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_id = workers[0].db_id # Register an agent mock_agent_details = "FAKE_ASSIGNMENT_2" self.architect.server.register_mock_agent(worker_id, mock_agent_details) self.assertEqual(len(task_runner.running_units), 2, "Tasks were not launched") agents = [a.agent for a in sup.agents.values()] # Make both agents act agent_id_1, agent_id_2 = agents[0].db_id, agents[1].db_id agent_1_data = agents[0].datastore.agent_data[agent_id_1] agent_2_data = agents[1].datastore.agent_data[agent_id_2] self.architect.server.send_agent_act(agent_id_1, {"text": "message1"}) self.architect.server.send_agent_act(agent_id_2, {"text": "message2"}) # Give up to 1 seconds for the actual operations to occur start_time = time.time() TIMEOUT_TIME = 1 while time.time() - start_time < TIMEOUT_TIME: if len(agent_1_data["acts"]) > 0: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Did not process messages in time") # Give up to 1 seconds for the task to complete afterwards start_time = time.time() TIMEOUT_TIME = 1 while time.time() - start_time < TIMEOUT_TIME: if len(task_runner.running_units) == 0: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Did not complete task in time") # Give up to 1 seconds for all messages to propogate start_time = time.time() TIMEOUT_TIME = 1 while time.time() - start_time < TIMEOUT_TIME: if self.architect.server.actions_observed == 2: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Not all actions observed in time") sup.shutdown() self.assertTrue(channel_info.channel.is_closed) def test_register_job(self): """Test registering and running a job run asynchronously""" # Handle baseline setup sup = Supervisor(self.db) self.sup = sup TaskRunnerClass = MockBlueprint.TaskRunnerClass args = MockBlueprint.ArgsClass() args.timeout_time = 5 config = OmegaConf.structured(MephistoConfig(blueprint=args)) task_runner = TaskRunnerClass(self.task_run, config, EMPTY_STATE) sup.register_job(self.architect, task_runner, self.provider) self.assertEqual(len(sup.channels), 1) channel_info = list(sup.channels.values())[0] self.assertIsNotNone(channel_info) self.assertTrue(channel_info.channel.is_alive()) channel_id = channel_info.channel_id task_runner = channel_info.job.task_runner self.assertIsNotNone(channel_id) self.assertEqual( len(self.architect.server.subs), 1, "MockServer doesn't see registered channel", ) self.assertIsNotNone( self.architect.server.last_alive_packet, "No alive packet received by server", ) sup.launch_sending_thread() self.assertIsNotNone(sup.sending_thread) # Register a worker mock_worker_name = "MOCK_WORKER" self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) self.assertEqual(len(workers), 1, "Worker not successfully registered") worker = workers[0] self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) self.assertEqual(len(workers), 1, "Worker potentially re-registered") worker_id = workers[0].db_id self.assertEqual(len(task_runner.running_assignments), 0) # Register an agent mock_agent_details = "FAKE_ASSIGNMENT" self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 1, "Agent was not created properly") self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 1, "Agent may have been duplicated") agent = agents[0] self.assertIsNotNone(agent) self.assertEqual(len(sup.agents), 1, "Agent not registered with supervisor") self.assertEqual(len(task_runner.running_assignments), 0, "Task was not yet ready") # Register another worker mock_worker_name = "MOCK_WORKER_2" self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_id = workers[0].db_id # Register an agent mock_agent_details = "FAKE_ASSIGNMENT_2" self.architect.server.register_mock_agent(worker_id, mock_agent_details) self.assertEqual(len(task_runner.running_assignments), 1, "Task was not launched") agents = [a.agent for a in sup.agents.values()] # Make both agents act agent_id_1, agent_id_2 = agents[0].db_id, agents[1].db_id agent_1_data = agents[0].datastore.agent_data[agent_id_1] agent_2_data = agents[1].datastore.agent_data[agent_id_2] self.architect.server.send_agent_act(agent_id_1, {"text": "message1"}) self.architect.server.send_agent_act(agent_id_2, {"text": "message2"}) # Give up to 1 seconds for the actual operation to occur start_time = time.time() TIMEOUT_TIME = 1 while time.time() - start_time < TIMEOUT_TIME: if len(agent_1_data["acts"]) > 0: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Did not process messages in time") # Give up to 1 seconds for the task to complete afterwards start_time = time.time() TIMEOUT_TIME = 1 while time.time() - start_time < TIMEOUT_TIME: if len(task_runner.running_assignments) == 0: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Did not complete task in time") # Give up to 1 seconds for all messages to propogate start_time = time.time() TIMEOUT_TIME = 1 while time.time() - start_time < TIMEOUT_TIME: if self.architect.server.actions_observed == 2: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Not all actions observed in time") sup.shutdown() self.assertTrue(channel_info.channel.is_closed()) def test_register_concurrent_job_with_onboarding(self): """Test registering and running a job with onboarding""" # Handle baseline setup sup = Supervisor(self.db) self.sup = sup TEST_QUALIFICATION_NAME = "test_onboarding_qualification" task_run_args = self.task_run.args task_run_args.blueprint.use_onboarding = True task_run_args.blueprint.onboarding_qualification = TEST_QUALIFICATION_NAME task_run_args.blueprint.timeout_time = 5 task_run_args.blueprint.is_concurrent = True self.task_run.get_task_config() # Supervisor expects that blueprint setup has already occurred blueprint = self.task_run.get_blueprint() TaskRunnerClass = MockBlueprint.TaskRunnerClass task_runner = TaskRunnerClass(self.task_run, task_run_args, EMPTY_STATE) sup.register_job(self.architect, task_runner, self.provider) self.assertEqual(len(sup.channels), 1) channel_info = list(sup.channels.values())[0] self.assertIsNotNone(channel_info) self.assertTrue(channel_info.channel.is_alive()) channel_id = channel_info.channel_id task_runner = channel_info.job.task_runner self.assertIsNotNone(channel_id) self.assertEqual( len(self.architect.server.subs), 1, "MockServer doesn't see registered channel", ) self.assertIsNotNone( self.architect.server.last_alive_packet, "No alive packet received by server", ) sup.launch_sending_thread() self.assertIsNotNone(sup.sending_thread) self.assertEqual(len(task_runner.running_units), 0) # Fail to register an agent who fails onboarding mock_worker_name = "BAD_WORKER" self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) self.assertEqual(len(workers), 1, "Worker not successfully registered") worker_0 = workers[0] self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) self.assertEqual(len(workers), 1, "Worker potentially re-registered") worker_id = workers[0].db_id mock_agent_details = "FAKE_ASSIGNMENT" self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 0, "Agent should not be created yet - need onboarding") onboard_agents = self.db.find_onboarding_agents() self.assertEqual(len(onboard_agents), 1, "Onboarding agent should have been created") time.sleep(0.1) last_packet = self.architect.server.last_packet self.assertIsNotNone(last_packet) self.assertIn("onboard_data", last_packet["data"], "Onboarding not triggered") self.architect.server.last_packet = None # Submit onboarding from the agent onboard_data = {"should_pass": False} self.architect.server.register_mock_agent_after_onboarding( worker_id, onboard_agents[0].get_agent_id(), onboard_data) agents = self.db.find_agents() self.assertEqual(len(agents), 0, "Failed agent created after onboarding") # Re-register as if refreshing self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 0, "Failed agent created after onboarding") self.assertEqual(len(sup.agents), 0, "Failed agent registered with supervisor") self.assertEqual( len(task_runner.running_units), 0, "Task should not launch with failed worker", ) # Register a worker mock_worker_name = "MOCK_WORKER" self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) self.assertEqual(len(workers), 1, "Worker not successfully registered") worker_1 = workers[0] self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) self.assertEqual(len(workers), 1, "Worker potentially re-registered") worker_id = workers[0].db_id self.assertEqual(len(task_runner.running_assignments), 0) # Fail to register a blocked agent mock_agent_details = "FAKE_ASSIGNMENT" qualification_id = blueprint.onboarding_qualification_id self.db.grant_qualification(qualification_id, worker_1.db_id, 0) self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 0, "Agent should not be created yet, failed onboarding") time.sleep(0.1) last_packet = self.architect.server.last_packet self.assertIsNotNone(last_packet) self.assertNotIn( "onboard_data", last_packet["data"], "Onboarding triggered for disqualified worker", ) self.assertIsNone(last_packet["data"]["agent_id"], "worker assigned real agent id") self.architect.server.last_packet = None self.db.revoke_qualification(qualification_id, worker_id) # Register an onboarding agent successfully mock_agent_details = "FAKE_ASSIGNMENT" self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 0, "Agent should not be created yet - need onboarding") onboard_agents = self.db.find_onboarding_agents() self.assertEqual(len(onboard_agents), 2, "Onboarding agent should have been created") time.sleep(0.1) last_packet = self.architect.server.last_packet self.assertIsNotNone(last_packet) self.assertIn("onboard_data", last_packet["data"], "Onboarding not triggered") self.architect.server.last_packet = None # Submit onboarding from the agent onboard_data = {"should_pass": True} self.architect.server.register_mock_agent_after_onboarding( worker_id, onboard_agents[1].get_agent_id(), onboard_data) agents = self.db.find_agents() self.assertEqual(len(agents), 1, "Agent not created after onboarding") # Re-register as if refreshing self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 1, "Agent may have been duplicated") agent = agents[0] self.assertIsNotNone(agent) self.assertEqual(len(sup.agents), 1, "Agent not registered with supervisor") self.assertEqual( len(task_runner.running_assignments), 0, "Task was not yet ready, should not launch", ) # Register another worker mock_worker_name = "MOCK_WORKER_2" self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_2 = workers[0] worker_id = worker_2.db_id # Register an agent that is already qualified mock_agent_details = "FAKE_ASSIGNMENT_2" self.db.grant_qualification(qualification_id, worker_2.db_id, 1) self.architect.server.register_mock_agent(worker_id, mock_agent_details) time.sleep(0.1) last_packet = self.architect.server.last_packet self.assertIsNotNone(last_packet) self.assertNotIn( "onboard_data", last_packet["data"], "Onboarding triggered for qualified agent", ) agents = self.db.find_agents() self.assertEqual(len(agents), 2, "Second agent not created without onboarding") self.assertEqual(len(task_runner.running_assignments), 1, "Task was not launched") self.assertFalse(worker_0.is_qualified(TEST_QUALIFICATION_NAME)) self.assertTrue(worker_0.is_disqualified(TEST_QUALIFICATION_NAME)) self.assertTrue(worker_1.is_qualified(TEST_QUALIFICATION_NAME)) self.assertFalse(worker_1.is_disqualified(TEST_QUALIFICATION_NAME)) self.assertTrue(worker_2.is_qualified(TEST_QUALIFICATION_NAME)) self.assertFalse(worker_2.is_disqualified(TEST_QUALIFICATION_NAME)) agents = [a.agent for a in sup.agents.values()] # Make both agents act agent_id_1, agent_id_2 = agents[0].db_id, agents[1].db_id agent_1_data = agents[0].datastore.agent_data[agent_id_1] agent_2_data = agents[1].datastore.agent_data[agent_id_2] self.architect.server.send_agent_act(agent_id_1, {"text": "message1"}) self.architect.server.send_agent_act(agent_id_2, {"text": "message2"}) # Give up to 1 seconds for the actual operation to occur start_time = time.time() TIMEOUT_TIME = 1 while time.time() - start_time < TIMEOUT_TIME: if len(agent_1_data["acts"]) > 0: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Did not process messages in time") # Give up to 1 seconds for the task to complete afterwards start_time = time.time() TIMEOUT_TIME = 1 while time.time() - start_time < TIMEOUT_TIME: if len(task_runner.running_assignments) == 0: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Did not complete task in time") # Give up to 1 seconds for all messages to propogate start_time = time.time() TIMEOUT_TIME = 1 while time.time() - start_time < TIMEOUT_TIME: if self.architect.server.actions_observed == 2: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Not all actions observed in time") sup.shutdown() self.assertTrue(channel_info.channel.is_closed()) def test_register_job_with_onboarding(self): """Test registering and running a job with onboarding""" # Handle baseline setup sup = Supervisor(self.db) self.sup = sup TEST_QUALIFICATION_NAME = "test_onboarding_qualification" # Register onboarding arguments for blueprint task_run_args = self.task_run.args task_run_args.blueprint.use_onboarding = True task_run_args.blueprint.onboarding_qualification = TEST_QUALIFICATION_NAME task_run_args.blueprint.timeout_time = 5 task_run_args.blueprint.is_concurrent = False self.task_run.get_task_config() # Supervisor expects that blueprint setup has already occurred blueprint = self.task_run.get_blueprint() TaskRunnerClass = MockBlueprint.TaskRunnerClass task_runner = TaskRunnerClass(self.task_run, task_run_args, EMPTY_STATE) sup.register_job(self.architect, task_runner, self.provider) self.assertEqual(len(sup.channels), 1) channel_info = list(sup.channels.values())[0] self.assertIsNotNone(channel_info) self.assertTrue(channel_info.channel.is_alive()) channel_id = channel_info.channel_id task_runner = channel_info.job.task_runner self.assertIsNotNone(channel_id) self.assertEqual( len(self.architect.server.subs), 1, "MockServer doesn't see registered channel", ) self.assertIsNotNone( self.architect.server.last_alive_packet, "No alive packet received by server", ) sup.launch_sending_thread() self.assertIsNotNone(sup.sending_thread) # Register a worker mock_worker_name = "MOCK_WORKER" self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) self.assertEqual(len(workers), 1, "Worker not successfully registered") worker_1 = workers[0] self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) self.assertEqual(len(workers), 1, "Worker potentially re-registered") worker_id = workers[0].db_id self.assertEqual(len(task_runner.running_units), 0) # Fail to register a blocked agent mock_agent_details = "FAKE_ASSIGNMENT" qualification_id = blueprint.onboarding_qualification_id self.db.grant_qualification(qualification_id, worker_1.db_id, 0) self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 0, "Agent should not be created yet, failed onboarding") time.sleep(0.1) last_packet = self.architect.server.last_packet self.assertIsNotNone(last_packet) self.assertNotIn( "onboard_data", last_packet["data"], "Onboarding triggered for disqualified worker", ) self.assertIsNone(last_packet["data"]["agent_id"], "worker assigned real agent id") self.architect.server.last_packet = None self.db.revoke_qualification(qualification_id, worker_id) # Register an agent successfully mock_agent_details = "FAKE_ASSIGNMENT" self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 0, "Agent should not be created yet - need onboarding") onboard_agents = self.db.find_onboarding_agents() self.assertEqual(len(onboard_agents), 1, "Onboarding agent should have been created") time.sleep(0.1) last_packet = self.architect.server.last_packet self.assertIsNotNone(last_packet) self.assertIn("onboard_data", last_packet["data"], "Onboarding not triggered") self.architect.server.last_packet = None # Submit onboarding from the agent onboard_data = {"should_pass": False} self.architect.server.register_mock_agent_after_onboarding( worker_id, onboard_agents[0].get_agent_id(), onboard_data) agents = self.db.find_agents() self.assertEqual(len(agents), 0, "Failed agent created after onboarding") # Re-register as if refreshing self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 0, "Failed agent created after onboarding") self.assertEqual(len(sup.agents), 0, "Failed agent registered with supervisor") self.assertEqual( len(task_runner.running_units), 0, "Task should not launch with failed worker", ) # Register another worker mock_worker_name = "MOCK_WORKER_2" self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_2 = workers[0] worker_id = worker_2.db_id # Register an agent that is already qualified mock_agent_details = "FAKE_ASSIGNMENT_2" self.db.grant_qualification(qualification_id, worker_2.db_id, 1) self.architect.server.register_mock_agent(worker_id, mock_agent_details) time.sleep(0.1) last_packet = self.architect.server.last_packet self.assertIsNotNone(last_packet) self.assertNotIn( "onboard_data", last_packet["data"], "Onboarding triggered for qualified agent", ) agents = self.db.find_agents() self.assertEqual(len(agents), 1, "Second agent not created without onboarding") self.assertEqual(len(task_runner.running_units), 1, "Tasks were not launched") self.assertFalse(worker_1.is_qualified(TEST_QUALIFICATION_NAME)) self.assertTrue(worker_1.is_disqualified(TEST_QUALIFICATION_NAME)) self.assertTrue(worker_2.is_qualified(TEST_QUALIFICATION_NAME)) self.assertFalse(worker_2.is_disqualified(TEST_QUALIFICATION_NAME)) # Register another worker mock_worker_name = "MOCK_WORKER_3" self.architect.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_3 = workers[0] worker_id = worker_3.db_id mock_agent_details = "FAKE_ASSIGNMENT_3" self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 1, "Agent should not be created yet - need onboarding") onboard_agents = self.db.find_onboarding_agents() self.assertEqual(len(onboard_agents), 2, "Onboarding agent should have been created") time.sleep(0.1) last_packet = self.architect.server.last_packet self.assertIsNotNone(last_packet) self.assertIn("onboard_data", last_packet["data"], "Onboarding not triggered") self.architect.server.last_packet = None # Submit onboarding from the agent onboard_data = {"should_pass": True} self.architect.server.register_mock_agent_after_onboarding( worker_id, onboard_agents[1].get_agent_id(), onboard_data) agents = self.db.find_agents() self.assertEqual(len(agents), 2, "Agent not created after onboarding") # Re-register as if refreshing self.architect.server.register_mock_agent(worker_id, mock_agent_details) agents = self.db.find_agents() self.assertEqual(len(agents), 2, "Duplicate agent created after onboarding") agent = agents[1] self.assertIsNotNone(agent) self.assertEqual(len(sup.agents), 2, "Agent not registered supervisor after onboarding") self.assertEqual(len(task_runner.running_units), 2, "Task not launched after onboarding") agents = [a.agent for a in sup.agents.values()] # Make both agents act agent_id_1, agent_id_2 = agents[0].db_id, agents[1].db_id agent_1_data = agents[0].datastore.agent_data[agent_id_1] agent_2_data = agents[1].datastore.agent_data[agent_id_2] self.architect.server.send_agent_act(agent_id_1, {"text": "message1"}) self.architect.server.send_agent_act(agent_id_2, {"text": "message2"}) # Give up to 1 seconds for the actual operation to occur start_time = time.time() TIMEOUT_TIME = 1 while time.time() - start_time < TIMEOUT_TIME: if len(agent_1_data["acts"]) > 0: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Did not process messages in time") # Give up to 1 seconds for the task to complete afterwards start_time = time.time() TIMEOUT_TIME = 1 while time.time() - start_time < TIMEOUT_TIME: if len(task_runner.running_units) == 0: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Did not complete task in time") # Give up to 1 seconds for all messages to propogate start_time = time.time() TIMEOUT_TIME = 1 while time.time() - start_time < TIMEOUT_TIME: if self.architect.server.actions_observed == 2: break time.sleep(0.1) self.assertLess(time.time() - start_time, TIMEOUT_TIME, "Not all actions observed in time") sup.shutdown() self.assertTrue(channel_info.channel.is_closed())
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """ Utility script that finds, expires, and disposes HITs that may not have been taking down during a run that exited improperly. """ from mephisto.providers.mturk.mturk_utils import ( get_outstanding_hits, expire_and_dispose_hits, ) from mephisto.core.local_database import LocalMephistoDB db = LocalMephistoDB() all_requesters = db.find_requesters(provider_type="mturk") all_requesters += db.find_requesters(provider_type="mturk_sandbox") print( "You have the following requesters available for mturk and mturk sandbox:") r_names = [r.requester_name for r in all_requesters] print(sorted(r_names)) use_name = input("Enter the name of the requester to clear HITs from:\n>> ") while use_name not in r_names: use_name = input(f"Sorry, {use_name} is not in the requester list. " f"The following are valid: {r_names}\n" f"Select one:\n>> ")
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. from mephisto.core.local_database import LocalMephistoDB from mephisto.core.data_browser import DataBrowser as MephistoDataBrowser from mephisto.data_model.worker import Worker from mephisto.data_model.assignment import Unit db = LocalMephistoDB() mephisto_data_browser = MephistoDataBrowser(db=db) DO_REVIEW = True units = mephisto_data_browser.get_units_for_task_name( input("Input task name: ")) tasks_to_show = input("Tasks to see? (a)ll/(u)nreviewed: ") if tasks_to_show in ["all", "a"]: DO_REVIEW = False else: units = [u for u in units if u.get_status() == "completed"] print( "You will be reviewing actual tasks with this flow. Tasks that you either Accept or Pass " "will be paid out to the worker, while rejected tasks will not. Passed tasks will be " "specially marked such that you can leave them out of your dataset. \n" "When you pass on a task, the script gives you an option to disqualify the worker " "from future tasks by assigning a qualification. If provided, this worker will no " "longer be able to work on tasks where the set --block-qualification shares the same name.\n"
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. from mephisto.providers.mturk.utils.script_utils import direct_soft_block_mturk_workers from mephisto.core.local_database import LocalMephistoDB db = LocalMephistoDB() reqs = db.find_requesters(provider_type="mturk") names = [r.requester_name for r in reqs] print("Available Requesters: ", names) requester_name = input("Select a requester to soft block from: ") soft_block_qual_name = input("Provide a soft blocking qualification name: ") workers_to_block = [] while True: new_id = input( "MTurk Worker Id to soft block (blank to block all entered): ") if len(new_id.strip()) == 0: break workers_to_block.append(new_id) direct_soft_block_mturk_workers(db, workers_to_block, soft_block_qual_name, requester_name)
def __init__(self, db=None): if db is None: db = LocalMephistoDB() self.db = db
class CrowdsourcingTestMixin: """ Mixin for end-to-end tests of Mephisto-based crowdsourcing tasks. Allows for setup and teardown of the operator, as well as for config specification and agent registration. """ def setUp(self): self.operator = None def tearDown(self): if self.operator is not None: self.operator.shutdown() def _set_up_config( self, blueprint_type: str, task_directory: str, overrides: Optional[List[str]] = None, ): """ Set up the config and database. Uses the Hydra compose() API for unit testing and a temporary directory to store the test database. :param blueprint_type: string uniquely specifying Blueprint class :param task_directory: directory containing the `conf/` configuration folder. Will be injected as `${task_dir}` in YAML files. :param overrides: additional config overrides """ # Define the configuration settings relative_task_directory = os.path.relpath(task_directory, os.path.dirname(__file__)) relative_config_path = os.path.join(relative_task_directory, 'conf') if overrides is None: overrides = [] with initialize(config_path=relative_config_path): self.config = compose( config_name="example", overrides=[ f'+mephisto.blueprint._blueprint_type={blueprint_type}', f'+mephisto/architect=mock', f'+mephisto/provider=mock', f'+task_dir={task_directory}', f'+current_time={int(time.time())}', ] + overrides, ) # TODO: when Hydra 1.1 is released with support for recursive defaults, # don't manually specify all missing blueprint args anymore, but # instead define the blueprint in the defaults list directly. # Currently, the blueprint can't be set in the defaults list without # overriding params in the YAML file, as documented at # https://github.com/facebookresearch/hydra/issues/326 and as fixed in # https://github.com/facebookresearch/hydra/pull/1044. self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) self.config = augment_config_from_db(self.config, self.db) self.config.mephisto.architect.should_run_server = True def _set_up_server(self, shared_state: Optional[SharedTaskState] = None): """ Set up the operator and server. """ self.operator = Operator(self.db) self.operator.validate_and_run_config(self.config.mephisto, shared_state=shared_state) channel_info = list(self.operator.supervisor.channels.values())[0] self.server = channel_info.job.architect.server def _register_mock_agents(self, num_agents: int = 1) -> List[str]: """ Register mock agents for testing, taking the place of crowdsourcing workers. Specify the number of agents to register. Return the agents' IDs after creation. """ for idx in range(num_agents): # Register the worker mock_worker_name = f"MOCK_WORKER_{idx:d}" self.server.register_mock_worker(mock_worker_name) workers = self.db.find_workers(worker_name=mock_worker_name) worker_id = workers[0].db_id # Register the agent mock_agent_details = f"FAKE_ASSIGNMENT_{idx:d}" self.server.register_mock_agent(worker_id, mock_agent_details) # Get all agents' IDs agents = self.db.find_agents() agent_ids = [agent.db_id for agent in agents] return agent_ids