示例#1
0
 def _create_single_assignment(self, assignment_data) -> None:
     """ Create a single assignment in the database using its read assignment_data """
     task_run = self.task_run
     task_config = task_run.get_task_config()
     assignment_id = self.db.new_assignment(
         task_run.task_id,
         task_run.db_id,
         task_run.requester_id,
         task_run.task_type,
         task_run.provider_type,
         task_run.sandbox,
     )
     assignment = Assignment(self.db, assignment_id)
     assignment.write_assignment_data(assignment_data)
     self.assignments.append(assignment)
     unit_count = len(assignment_data["unit_data"])
     for unit_idx in range(unit_count):
         unit_id = self.db.new_unit(
             task_run.task_id,
             task_run.db_id,
             task_run.requester_id,
             assignment_id,
             unit_idx,
             task_config.task_reward,
             task_run.provider_type,
             task_run.task_type,
             task_run.sandbox,
         )
         self.units.append(Unit(self.db, unit_id))
         with self.unlaunched_units_access_condition:
             self.unlaunched_units[unit_id] = Unit(self.db, unit_id)
示例#2
0
    def get_unit(self) -> "Unit":
        """
        Return the Unit that this agent is working on.
        """
        if self._unit is None:
            from mephisto.data_model.assignment import Unit

            self._unit = Unit(self.db, self.unit_id)
        return self._unit
示例#3
0
def get_mturk_ids_from_unit_id(db, unit_id: str) -> Dict[str, Optional[str]]:
    """
    Find the relevant mturk ids from the given mephisto unit id
    """
    mturk_unit = Unit(db, unit_id)
    assignment_id = mturk_unit.get_mturk_assignment_id()
    hit_id = mturk_unit.get_mturk_hit_id()
    agent = mturk_unit.get_assigned_agent()
    worker_id = None
    if agent is not None:
        worker_id = agent.get_worker().get_mturk_worker_id()
    return {
        "assignment_id": assignment_id,
        "hit_id": hit_id,
        "worker_id": worker_id
    }
    def test_unit_fails(self) -> None:
        """Ensure units fail to be created or loaded under failure conditions"""
        assert self.db is not None, "No db initialized"
        db: MephistoDB = self.db

        # Cant get non-existent entry
        with self.assertRaises(EntryDoesNotExistException):
            unit = Unit(db, self.get_fake_id("Unit"))

        assignment_id = get_test_assignment(db)
        assignment = Assignment(db, assignment_id)
        unit_index = 0
        pay_amount = 15.0
        provider_type = PROVIDER_TYPE

        # Can't use invalid assignment_id name
        with self.assertRaises(EntryDoesNotExistException):
            unit_id = db.new_unit(
                assignment.task_id,
                assignment.task_run_id,
                assignment.requester_id,
                self.get_fake_id("Assignment"),
                unit_index,
                pay_amount,
                provider_type,
                assignment.sandbox,
            )

        unit_id = db.new_unit(
            assignment.task_id,
            assignment.task_run_id,
            assignment.requester_id,
            assignment.db_id,
            unit_index,
            pay_amount,
            provider_type,
            assignment.sandbox,
        )

        # Can't create same unit again
        with self.assertRaises(EntryAlreadyExistsException):
            unit_id = db.new_unit(
                assignment.task_id,
                assignment.task_run_id,
                assignment.requester_id,
                assignment.db_id,
                unit_index,
                pay_amount,
                provider_type,
                assignment.sandbox,
            )

        # Ensure no units were created
        units = db.find_units()
        self.assertEqual(len(units), 1)
示例#5
0
def make_completed_unit(db: MephistoDB) -> str:
    """
    Creates a completed unit for the most recently created task run
    using some worker. Assumes
    """
    workers = db.find_workers()
    assert len(workers) > 0, "Must have at least one worker in database"
    worker = workers[-1]
    task_runs = db.find_task_runs(is_completed=False)
    assert len(task_runs) > 0, "Must be at least one incomplete task run"
    task_run = task_runs[-1]
    assign_id = db.new_assignment(
        task_run.task_id,
        task_run.db_id,
        task_run.requester_id,
        task_run.task_type,
        task_run.provider_type,
    )
    unit_id = db.new_unit(
        task_run.task_id,
        task_run.db_id,
        task_run.requester_id,
        assign_id,
        0,
        0.2,
        task_run.provider_type,
        task_run.task_type,
    )
    agent_id = db.new_agent(
        worker.db_id,
        unit_id,
        task_run.task_id,
        task_run.db_id,
        assign_id,
        task_run.task_type,
        task_run.provider_type,
    )
    agent = Agent(db, agent_id)
    agent.mark_done()
    unit = Unit(db, unit_id)
    unit.sync_status()
    return unit.db_id
示例#6
0
 def get_data_from_unit(self, unit: Unit) -> Dict[str, Any]:
     agent = unit.get_assigned_agent()
     assert (
         agent is not None
     ), f"Trying to get completed data from unassigned unit {unit}"
     return {
         "worker_id": agent.worker_id,
         "unit_id": unit.db_id,
         "assignment_id": unit.assignment_id,
         "status": agent.db_status,
         "data": agent.state.get_parsed_data(),
         "task_start": agent.state.get_task_start(),
         "task_end": agent.state.get_task_end(),
     }
    def test_unit(self) -> None:
        """Test creation and querying of units"""
        assert self.db is not None, "No db initialized"
        db: MephistoDB = self.db

        # Check creation and retrieval of a unit
        assignment_id = get_test_assignment(db)
        assignment = Assignment(db, assignment_id)
        unit_index = 0
        pay_amount = 15.0
        provider_type = PROVIDER_TYPE

        unit_id = db.new_unit(
            assignment.task_id,
            assignment.task_run_id,
            assignment.requester_id,
            assignment.db_id,
            unit_index,
            pay_amount,
            provider_type,
            assignment.sandbox,
        )
        self.assertIsNotNone(unit_id)
        self.assertTrue(isinstance(unit_id, str))
        unit_row = db.get_unit(unit_id)
        self.assertEqual(unit_row["assignment_id"], assignment_id)
        self.assertEqual(unit_row["pay_amount"], pay_amount)
        self.assertEqual(unit_row["status"], AssignmentState.CREATED)

        unit = Unit(db, unit_id)
        self.assertEqual(unit.assignment_id, assignment_id)

        # Check finding for units
        units = db.find_units()
        self.assertEqual(len(units), 1)
        self.assertTrue(isinstance(units[0], Unit))
        self.assertEqual(units[0].db_id, unit_id)
        self.assertEqual(units[0].assignment_id, assignment_id)
        self.assertEqual(units[0].pay_amount, pay_amount)

        # Check finding for specific units
        units = db.find_units(assignment_id=assignment_id)
        self.assertEqual(len(units), 1)
        self.assertTrue(isinstance(units[0], Unit))
        self.assertEqual(units[0].db_id, unit_id)
        self.assertEqual(units[0].assignment_id, assignment_id)
        self.assertEqual(units[0].pay_amount, pay_amount)

        units = db.find_units(assignment_id=self.get_fake_id("Assignment"))
        self.assertEqual(len(units), 0)
    def test_agent(self) -> None:
        """Test creation and querying of agents"""
        assert self.db is not None, "No db initialized"
        db: MephistoDB = self.db

        # Check creation and retrieval of a agent
        worker_name, worker_id = get_test_worker(db)
        unit_id = get_test_unit(db)
        unit = Unit(db, unit_id)

        agent_id = db.new_agent(
            worker_id,
            unit_id,
            unit.task_id,
            unit.task_run_id,
            unit.assignment_id,
            unit.task_type,
            unit.provider_type,
        )
        self.assertIsNotNone(agent_id)
        self.assertTrue(isinstance(agent_id, str))
        agent_row = db.get_agent(agent_id)
        self.assertEqual(agent_row["worker_id"], worker_id)
        self.assertEqual(agent_row["unit_id"], unit_id)
        self.assertEqual(agent_row["status"], AgentState.STATUS_NONE)

        # ensure the unit is assigned now
        units = db.find_units(status=AssignmentState.ASSIGNED)
        self.assertEqual(len(units), 1)

        agent = Agent(db, agent_id)
        self.assertEqual(agent.worker_id, worker_id)

        # Check finding for agents
        agents = db.find_agents()
        self.assertEqual(len(agents), 1)
        self.assertTrue(isinstance(agents[0], Agent))
        self.assertEqual(agents[0].db_id, agent_id)
        self.assertEqual(agents[0].worker_id, worker_id)

        # Check finding for specific agents
        agents = db.find_agents(worker_id=worker_id)
        self.assertEqual(len(agents), 1)
        self.assertTrue(isinstance(agents[0], Agent))
        self.assertEqual(agents[0].db_id, agent_id)
        self.assertEqual(agents[0].worker_id, worker_id)

        agents = db.find_agents(worker_id=self.get_fake_id("Worker"))
        self.assertEqual(len(agents), 0)
示例#9
0
def get_test_agent(db: MephistoDB, unit_id=None) -> str:
    # Check creation and retrieval of a agent
    worker_name, worker_id = get_test_worker(db)
    if unit_id is None:
        unit_id = get_test_unit(db)
    provider_type = "mock"
    task_type = "mock"
    unit = Unit(db, unit_id)
    return db.new_agent(
        worker_id,
        unit.db_id,
        unit.task_id,
        unit.task_run_id,
        unit.assignment_id,
        unit.task_type,
        unit.provider_type,
    )
    def test_agent_fails(self) -> None:
        """Ensure agents fail to be created or loaded under failure conditions"""
        assert self.db is not None, "No db initialized"
        db: MephistoDB = self.db

        # Cant get non-existent entry
        with self.assertRaises(EntryDoesNotExistException):
            agent = Agent(db, self.get_fake_id("Agent"))

        unit_id = get_test_unit(db)
        worker_name, worker_id = get_test_worker(db)
        unit = Unit(db, unit_id)

        # Can't use invalid worker id
        with self.assertRaises(EntryDoesNotExistException):
            agent_id = db.new_agent(
                self.get_fake_id("Worker"),
                unit_id,
                unit.task_id,
                unit.task_run_id,
                unit.assignment_id,
                unit.task_type,
                unit.provider_type,
            )

        # Can't use invalid unit id
        with self.assertRaises(EntryDoesNotExistException):
            agent_id = db.new_agent(
                worker_id,
                self.get_fake_id("Unit"),
                unit.task_id,
                unit.task_run_id,
                unit.assignment_id,
                unit.task_type,
                unit.provider_type,
            )

        # Ensure no agents were created
        agents = db.find_agents()
        self.assertEqual(len(agents), 0)
def format_for_printing_data(data):
    # Custom tasks can define methods for how to display their data in a relevant way
    worker_name = Worker(db, data["worker_id"]).worker_name
    contents = data["data"]
    duration = contents["times"]["task_end"] - contents["times"]["task_start"]
    metadata_string = (
        f"Worker: {worker_name}\nUnit: {data['unit_id']}\n"
        f"Duration: {int(duration)}\nStatus: {data['status']}\n")

    inputs = contents["inputs"]
    inputs_string = f"Character: {inputs['character_name']}\nDescription: {inputs['character_description']}\n"

    outputs = contents["outputs"]
    output_string = f"   Rating: {outputs['rating']}\n"
    found_files = outputs.get("files")
    if found_files is not None:
        file_dir = Unit(db,
                        data["unit_id"]).get_assigned_agent().get_data_dir()
        output_string += f"   Files: {found_files}\n"
        output_string += f"   File directory {file_dir}\n"
    else:
        output_string += f"   Files: No files attached\n"
    return f"-------------------\n{metadata_string}{inputs_string}{output_string}"
示例#12
0
class Agent(ABC):
    """
    This class encompasses a worker as they are working on an individual assignment.
    It maintains details for the current task at hand such as start and end time,
    connection status, etc.
    """
    def __init__(self,
                 db: "MephistoDB",
                 db_id: str,
                 row: Optional[Mapping[str, Any]] = None):
        self.db: "MephistoDB" = db
        if row is None:
            row = db.get_agent(db_id)
        assert row is not None, f"Given db_id {db_id} did not exist in given db"
        self.db_id: str = row["agent_id"]
        self.db_status = row["status"]
        self.worker_id = row["worker_id"]
        self.unit_id = row["unit_id"]
        self.task_type = row["task_type"]
        self.provider_type = row["provider_type"]
        self.pending_observations: List["Packet"] = []
        self.pending_actions: List["Packet"] = []
        self.has_action = threading.Event()
        self.has_action.clear()
        self.wants_action = threading.Event()
        self.wants_action.clear()
        self.has_updated_status = threading.Event()
        self.assignment_id = row["assignment_id"]
        self.task_run_id = row["task_run_id"]
        self.task_id = row["task_id"]
        self.did_submit = threading.Event()

        # Deferred loading of related entities
        self._worker: Optional["Worker"] = None
        self._unit: Optional["Unit"] = None
        self._assignment: Optional["Assignment"] = None
        self._task_run: Optional["TaskRun"] = None
        self._task: Optional["Task"] = None

        # Follow-up initialization
        self.state = AgentState(self)  # type: ignore

    def __new__(cls,
                db: "MephistoDB",
                db_id: str,
                row: Optional[Mapping[str, Any]] = None) -> "Agent":
        """
        The new method is overridden to be able to automatically generate
        the expected Agent class without needing to specifically find it
        for a given db_id. As such it is impossible to create a base Agent
        as you will instead be returned the correct Agent class according to
        the crowdprovider associated with this Agent.
        """
        from mephisto.core.registry import get_crowd_provider_from_type

        if cls == Agent:
            # We are trying to construct a Agent, find what type to use and
            # create that instead
            if row is None:
                row = db.get_agent(db_id)
            assert row is not None, f"Given db_id {db_id} did not exist in given db"
            correct_class = get_crowd_provider_from_type(
                row["provider_type"]).AgentClass
            return super().__new__(correct_class)
        else:
            # We are constructing another instance directly
            return super().__new__(cls)

    def get_agent_id(self) -> str:
        """Return this agent's id"""
        return self.db_id

    def get_worker(self) -> Worker:
        """
        Return the worker that is using this agent for a task
        """
        if self._worker is None:
            self._worker = Worker(self.db, self.worker_id)
        return self._worker

    def get_unit(self) -> "Unit":
        """
        Return the Unit that this agent is working on.
        """
        if self._unit is None:
            from mephisto.data_model.assignment import Unit

            self._unit = Unit(self.db, self.unit_id)
        return self._unit

    def get_assignment(self) -> "Assignment":
        """Return the assignment this agent is working on"""
        if self._assignment is None:
            if self._unit is not None:
                self._assignment = self._unit.get_assignment()
            else:
                from mephisto.data_model.assignment import Assignment

                self._assignment = Assignment(self.db, self.assignment_id)
        return self._assignment

    def get_task_run(self) -> "TaskRun":
        """Return the TaskRun this agent is working within"""
        if self._task_run is None:
            if self._unit is not None:
                self._task_run = self._unit.get_task_run()
            elif self._assignment is not None:
                self._task_run = self._assignment.get_task_run()
            else:
                from mephisto.data_model.task import TaskRun

                self._task_run = TaskRun(self.db, self.task_run_id)
        return self._task_run

    def get_task(self) -> "Task":
        """Return the Task this agent is working within"""
        if self._task is None:
            if self._unit is not None:
                self._task = self._unit.get_task()
            elif self._assignment is not None:
                self._task = self._assignment.get_task()
            elif self._task_run is not None:
                self._task = self._task_run.get_task()
            else:
                from mephisto.data_model.task import Task

                self._task = Task(self.db, self.task_id)
        return self._task

    def get_data_dir(self) -> str:
        """
        Return the directory to be storing any agent state for
        this agent into
        """
        assignment_dir = self.get_assignment().get_data_dir()
        return os.path.join(assignment_dir, self.db_id)

    def update_status(self, new_status: str) -> None:
        """Update the database status of this agent, and
        possibly send a message to the frontend agent informing
        them of this update"""
        if self.db_status == new_status:
            return  # Noop, this is already the case
        if self.db_status in AgentState.complete():
            print(f"Updating a final status, was {self.db_status} "
                  f"and want to set to {new_status}")
        self.db.update_agent(self.db_id, status=new_status)
        self.db_status = new_status
        self.has_updated_status.set()
        if new_status in [
                AgentState.STATUS_RETURNED, AgentState.STATUS_DISCONNECT
        ]:
            # Disconnect statuses should free any pending acts
            self.has_action.set()
            self.did_submit.set()

    @staticmethod
    def _register_agent(db: "MephistoDB", worker: Worker, unit: "Unit",
                        provider_type: str) -> "Agent":
        """
        Create this agent in the mephisto db with the correct setup
        """
        db_id = db.new_agent(
            worker.db_id,
            unit.db_id,
            unit.task_id,
            unit.task_run_id,
            unit.assignment_id,
            unit.task_type,
            provider_type,
        )
        a = Agent(db, db_id)
        a.update_status(AgentState.STATUS_ACCEPTED)
        return a

    # Specialized child cases may need to implement the following

    @classmethod
    def new_from_provider_data(
        cls,
        db: "MephistoDB",
        worker: Worker,
        unit: "Unit",
        provider_data: Dict[str, Any],
    ) -> "Agent":
        """
        Wrapper around the new method that allows registering additional
        bookkeeping information from a crowd provider for this agent
        """
        agent = cls.new(db, worker, unit)
        unit.worker_id = worker.db_id
        agent._unit = unit
        return agent

    def observe(self, packet: "Packet") -> None:
        """
        Pass the observed information to the AgentState, then
        queue the information to be pushed to the user
        """
        sending_packet = packet.copy()
        sending_packet.receiver_id = self.db_id
        self.state.update_data(sending_packet)
        self.pending_observations.append(sending_packet)

    def act(self, timeout: Optional[int] = None) -> Optional["Packet"]:
        """
        Request information from the Agent's frontend. If non-blocking,
        (timeout is None) should return None if no actions are ready
        to be returned.
        """
        if len(self.pending_actions) == 0:
            self.wants_action.set()
            if timeout is None or timeout == 0:
                return None
            self.has_action.wait(timeout)

        if len(self.pending_actions) == 0:
            # various disconnect cases
            status = self.get_status()
            if status == AgentState.STATUS_DISCONNECT:
                raise AgentDisconnectedError(self.db_id)
            elif status == AgentState.STATUS_RETURNED:
                raise AgentReturnedError(self.db_id)
            self.update_status(AgentState.STATUS_TIMEOUT)
            raise AgentTimeoutError(timeout, self.db_id)
        assert len(
            self.pending_actions) > 0, "has_action released without an action!"

        act = self.pending_actions.pop(0)

        if "MEPHISTO_is_submit" in act.data and act.data["MEPHISTO_is_submit"]:
            self.did_submit.set()

        if len(self.pending_actions) == 0:
            self.has_action.clear()
        self.state.update_data(act)
        return act

    def get_status(self) -> str:
        """Get the status of this agent in their work on their unit"""
        if self.db_status not in AgentState.complete():
            row = self.db.get_agent(self.db_id)
            if row["status"] != self.db_status:
                if row["status"] in [
                        AgentState.STATUS_RETURNED,
                        AgentState.STATUS_DISCONNECT,
                ]:
                    # Disconnect statuses should free any pending acts
                    self.has_action.set()
                self.has_updated_status.set()
            self.db_status = row["status"]
        return self.db_status

    # Children classes should implement the following methods

    def approve_work(self) -> None:
        """Approve the work done on this agent's specific Unit"""
        raise NotImplementedError()

    def soft_reject_work(self) -> None:
        """
        Pay a worker for attempted work, but mark it as below the 
        quality bar for this assignment
        """
        # TODO(OWN) extend this method to assign a soft block
        # qualification automatically if a threshold of
        # soft rejects as a proportion of total accepts
        # is exceeded
        self.approve_work()
        self.update_status(AgentState.STATUS_SOFT_REJECTED)

    def reject_work(self, reason) -> None:
        """Reject the work done on this agent's specific Unit"""
        raise NotImplementedError()

    def mark_done(self) -> None:
        """
        Take any required step with the crowd_provider to ensure that
        the worker can submit their work and be marked as complete via
        a call to get_status
        """
        raise NotImplementedError()

    @staticmethod
    def new(db: "MephistoDB", worker: Worker, unit: "Unit") -> "Agent":
        """
        Create an agent for this worker to be used for work on the given Unit.

        Implementation should return the result of _register_agent when sure the agent
        can be successfully created to have it put into the db.
        """
        raise NotImplementedError()
示例#13
0
def get_submitted_data():
    try:
        task_run_ids = request.args.getlist("task_run_id")
        task_names = request.args.getlist("task_name")
        assignment_ids = request.args.getlist("assignment_id")
        unit_ids = request.args.getlist("unit_ids")
        statuses = request.args.getlist("status")

        db = app.extensions["db"]
        units = []
        assignments = []
        assert len(
            task_names) == 0, "Searching via task names not yet supported"

        task_runs = [TaskRun(db, task_run_id) for task_run_id in task_run_ids]
        for task_run in task_runs:
            assignments += task_run.get_assignments()

        assignments += [
            Assignment(db, assignment_id) for assignment_id in assignment_ids
        ]

        if len(statuses) == 0:
            statuses = [
                AssignmentState.COMPLETED,
                AssignmentState.ACCEPTED,
                AssignmentState.REJECTED,
            ]

        filtered_assignments = [
            a for a in assignments if a.get_status() in statuses
        ]

        for assignment in assignments:
            units += assignment.get_units()

        units += [Unit(db, unit_id) for unit_id in unit_ids]

        all_unit_data = []
        for unit in units:
            unit_data = {
                "assignment_id": unit.assignment_id,
                "task_run_id": unit.task_run_id,
                "status": unit.db_status,
                "unit_id": unit.db_id,
                "worker_id": unit.worker_id,
                "data": None,
            }
            agent = unit.get_assigned_agent()
            if agent is not None:
                unit_data["data"] = agent.state.get_data()
                unit_data["worker_id"] = agent.worker_id
            all_unit_data.append(unit_data)

        print(all_unit_data)
        return jsonify({"success": True, "units": all_unit_data})
    except Exception as e:
        import traceback

        traceback.print_exc()
        return jsonify({"success": False, "msg": str(e)})