Пример #1
0
 def _create_single_assignment(self, assignment_data) -> None:
     """ Create a single assignment in the database using its read assignment_data """
     task_run = self.task_run
     task_config = task_run.get_task_config()
     assignment_id = self.db.new_assignment(
         task_run.task_id,
         task_run.db_id,
         task_run.requester_id,
         task_run.task_type,
         task_run.provider_type,
         task_run.sandbox,
     )
     assignment = Assignment(self.db, assignment_id)
     assignment.write_assignment_data(assignment_data)
     self.assignments.append(assignment)
     unit_count = len(assignment_data.unit_data)
     for unit_idx in range(unit_count):
         unit_id = self.db.new_unit(
             task_run.task_id,
             task_run.db_id,
             task_run.requester_id,
             assignment_id,
             unit_idx,
             task_config.task_reward,
             task_run.provider_type,
             task_run.task_type,
             task_run.sandbox,
         )
         self.units.append(Unit(self.db, unit_id))
         with self.unlaunched_units_access_condition:
             self.unlaunched_units[unit_id] = Unit(self.db, unit_id)
Пример #2
0
    def get_assignment(self) -> "Assignment":
        """
        Return the assignment that this Unit is part of.
        """
        if self.__assignment is None:
            from mephisto.data_model.assignment import Assignment

            self.__assignment = Assignment(self.db, self.assignment_id)
        return self.__assignment
Пример #3
0
    def get_assignment(self) -> "Assignment":
        """Return the assignment this agent is working on"""
        if self._assignment is None:
            if self._unit is not None:
                self._assignment = self._unit.get_assignment()
            else:
                from mephisto.data_model.assignment import Assignment

                self._assignment = Assignment(self.db, self.assignment_id)
        return self._assignment
Пример #4
0
 def get_test_assignment(self) -> Assignment:
     """Create a test assignment for self.task_run using mock agents"""
     task_run = self.task_run
     assignment_id = self.db.new_assignment(
         task_run.task_id,
         task_run.db_id,
         task_run.requester_id,
         task_run.task_type,
         task_run.provider_type,
     )
     assign = Assignment(self.db, assignment_id)
     unit_id = self.db.new_unit(
         task_run.task_id,
         task_run.db_id,
         task_run.requester_id,
         assignment_id,
         0,
         0,
         task_run.provider_type,
         task_run.task_type,
     )
     unit = MockUnit(self.db, unit_id)
     worker_id = self.db.new_worker("MOCK_TEST_WORKER", MOCK_PROVIDER_TYPE)
     worker = MockWorker(self.db, worker_id)
     agent_id = self.db.new_agent(
         worker.db_id,
         unit_id,
         task_run.task_id,
         task_run.db_id,
         assignment_id,
         task_run.task_type,
         task_run.provider_type,
     )
     Agent = MockAgent(self.db, agent_id)
     return assign
Пример #5
0
def get_submitted_data():
    try:
        task_run_ids = request.args.getlist("task_run_id")
        task_names = request.args.getlist("task_name")
        assignment_ids = request.args.getlist("assignment_id")
        unit_ids = request.args.getlist("unit_ids")
        statuses = request.args.getlist("status")

        db = app.extensions["db"]
        units = []
        assignments = []
        assert len(task_names) == 0, "Searching via task names not yet supported"

        task_runs = [TaskRun(db, task_run_id) for task_run_id in task_run_ids]
        for task_run in task_runs:
            assignments += task_run.get_assignments()

        assignments += [
            Assignment(db, assignment_id) for assignment_id in assignment_ids
        ]

        if len(statuses) == 0:
            statuses = [
                AssignmentState.COMPLETED,
                AssignmentState.ACCEPTED,
                AssignmentState.REJECTED,
            ]

        filtered_assignments = [a for a in assignments if a.get_status() in statuses]

        for assignment in assignments:
            units += assignment.get_units()

        units += [Unit(db, unit_id) for unit_id in unit_ids]

        all_unit_data = []
        for unit in units:
            unit_data = {
                "assignment_id": unit.assignment_id,
                "task_run_id": unit.task_run_id,
                "status": unit.db_status,
                "unit_id": unit.db_id,
                "worker_id": unit.worker_id,
                "data": None,
            }
            agent = unit.get_assigned_agent()
            if agent is not None:
                unit_data["data"] = agent.state.get_data()
                unit_data["worker_id"] = agent.worker_id
            all_unit_data.append(unit_data)

        print(all_unit_data)
        return jsonify({"success": True, "units": all_unit_data})
    except Exception as e:
        import traceback

        traceback.print_exc()
        return jsonify({"success": False, "msg": str(e)})
Пример #6
0
    def test_unit_fails(self) -> None:
        """Ensure units fail to be created or loaded under failure conditions"""
        assert self.db is not None, "No db initialized"
        db: MephistoDB = self.db

        # Cant get non-existent entry
        with self.assertRaises(EntryDoesNotExistException):
            unit = Unit(db, self.get_fake_id("Unit"))

        assignment_id = get_test_assignment(db)
        assignment = Assignment(db, assignment_id)
        unit_index = 0
        pay_amount = 15.0
        provider_type = PROVIDER_TYPE

        # Can't use invalid assignment_id name
        with self.assertRaises(EntryDoesNotExistException):
            unit_id = db.new_unit(
                assignment.task_id,
                assignment.task_run_id,
                assignment.requester_id,
                self.get_fake_id("Assignment"),
                unit_index,
                pay_amount,
                provider_type,
                assignment.sandbox,
            )

        unit_id = db.new_unit(
            assignment.task_id,
            assignment.task_run_id,
            assignment.requester_id,
            assignment.db_id,
            unit_index,
            pay_amount,
            provider_type,
            assignment.sandbox,
        )

        # Can't create same unit again
        with self.assertRaises(EntryAlreadyExistsException):
            unit_id = db.new_unit(
                assignment.task_id,
                assignment.task_run_id,
                assignment.requester_id,
                assignment.db_id,
                unit_index,
                pay_amount,
                provider_type,
                assignment.sandbox,
            )

        # Ensure no units were created
        units = db.find_units()
        self.assertEqual(len(units), 1)
Пример #7
0
    def test_unit(self) -> None:
        """Test creation and querying of units"""
        assert self.db is not None, "No db initialized"
        db: MephistoDB = self.db

        # Check creation and retrieval of a unit
        assignment_id = get_test_assignment(db)
        assignment = Assignment(db, assignment_id)
        unit_index = 0
        pay_amount = 15.0
        provider_type = PROVIDER_TYPE

        unit_id = db.new_unit(
            assignment.task_id,
            assignment.task_run_id,
            assignment.requester_id,
            assignment.db_id,
            unit_index,
            pay_amount,
            provider_type,
            assignment.sandbox,
        )
        self.assertIsNotNone(unit_id)
        self.assertTrue(isinstance(unit_id, str))
        unit_row = db.get_unit(unit_id)
        self.assertEqual(unit_row["assignment_id"], assignment_id)
        self.assertEqual(unit_row["pay_amount"], pay_amount)
        self.assertEqual(unit_row["status"], AssignmentState.CREATED)

        unit = Unit(db, unit_id)
        self.assertEqual(unit.assignment_id, assignment_id)

        # Check finding for units
        units = db.find_units()
        self.assertEqual(len(units), 1)
        self.assertTrue(isinstance(units[0], Unit))
        self.assertEqual(units[0].db_id, unit_id)
        self.assertEqual(units[0].assignment_id, assignment_id)
        self.assertEqual(units[0].pay_amount, pay_amount)

        # Check finding for specific units
        units = db.find_units(assignment_id=assignment_id)
        self.assertEqual(len(units), 1)
        self.assertTrue(isinstance(units[0], Unit))
        self.assertEqual(units[0].db_id, unit_id)
        self.assertEqual(units[0].assignment_id, assignment_id)
        self.assertEqual(units[0].pay_amount, pay_amount)

        units = db.find_units(assignment_id=self.get_fake_id("Assignment"))
        self.assertEqual(len(units), 0)
Пример #8
0
def get_test_unit(db: MephistoDB, unit_index=0) -> str:
    # Check creation and retrieval of a unit
    assignment_id = get_test_assignment(db)
    pay_amount = 15.0
    assignment = Assignment(db, assignment_id)
    return db.new_unit(
        assignment.task_id,
        assignment.task_run_id,
        assignment.requester_id,
        assignment.db_id,
        0,
        pay_amount,
        assignment.provider_type,
        assignment.task_type,
    )
    def test_assignment(self) -> None:
        """Test creation and querying of assignments"""
        assert self.db is not None, "No db initialized"
        db: MephistoDB = self.db

        task_run_id = get_test_task_run(db)
        task_run = TaskRun(db, task_run_id)

        # Check creation and retrieval of an assignment
        assignment_id = db.new_assignment(
            task_run.task_id,
            task_run_id,
            task_run.requester_id,
            task_run.task_type,
            task_run.provider_type,
            task_run.sandbox,
        )
        self.assertIsNotNone(assignment_id)
        self.assertTrue(isinstance(assignment_id, str))
        assignment_row = db.get_assignment(assignment_id)
        self.assertEqual(assignment_row["task_run_id"], task_run_id)
        assignment = Assignment(db, assignment_id)
        self.assertEqual(assignment.task_run_id, task_run_id)

        # Check finding for assignments
        assignments = db.find_assignments()
        self.assertEqual(len(assignments), 1)
        self.assertTrue(isinstance(assignments[0], Assignment))
        self.assertEqual(assignments[0].db_id, assignment_id)
        self.assertEqual(assignments[0].task_run_id, task_run_id)

        # Check finding for specific assignments
        assignments = db.find_assignments(task_run_id=task_run_id)
        self.assertEqual(len(assignments), 1)
        self.assertTrue(isinstance(assignments[0], Assignment))
        self.assertEqual(assignments[0].db_id, assignment_id)
        self.assertEqual(assignments[0].task_run_id, task_run_id)

        assignments = db.find_assignments(
            task_run_id=self.get_fake_id("Assignment"))
        self.assertEqual(len(assignments), 0)
Пример #10
0
 def find_assignments(
     self,
     task_run_id: Optional[str] = None,
     task_id: Optional[str] = None,
     requester_id: Optional[str] = None,
     task_type: Optional[str] = None,
     provider_type: Optional[str] = None,
     sandbox: Optional[bool] = None,
 ) -> List[Assignment]:
     """
     Try to find any task that matches the above. When called with no arguments,
     return all tasks.
     """
     with self.table_access_condition:
         conn = self._get_connection()
         c = conn.cursor()
         c.execute(
             """
                 SELECT * from assignments
                 WHERE (?1 IS NULL OR task_run_id = ?1)
                 AND (?2 IS NULL OR task_id = ?2)
                 AND (?3 IS NULL OR requester_id = ?3)
                 AND (?4 IS NULL OR task_type = ?4)
                 AND (?5 IS NULL OR provider_type = ?5)
                 AND (?6 IS NULL OR sandbox = ?6)
             """,
             (
                 nonesafe_int(task_run_id),
                 nonesafe_int(task_id),
                 nonesafe_int(requester_id),
                 task_type,
                 provider_type,
                 sandbox,
             ),
         )
         rows = c.fetchall()
         return [Assignment(self, str(r["assignment_id"]), row=r) for r in rows]
Пример #11
0
 def launch_evaluation_unit(self, unit_data: Dict[str, Any],
                            unit_type_index: int) -> "Unit":
     """Launch a specific evaluation unit, used for quality control"""
     assert (self.launch_url is not None
             ), "Cannot launch an evaluation unit before launching others"
     task_run = self.task_run
     task_args = task_run.get_task_args()
     assignment_id = self.db.new_assignment(
         task_run.task_id,
         task_run.db_id,
         task_run.requester_id,
         task_run.task_type,
         task_run.provider_type,
         task_run.sandbox,
     )
     data = InitializationData(unit_data, [{}])
     assignment = Assignment.get(self.db, assignment_id)
     assignment.write_assignment_data(data)
     self.assignments.append(assignment)
     evaluation_unit = self.UnitClass.new(self.db, assignment,
                                          unit_type_index,
                                          task_args.task_reward)
     evaluation_unit.launch(self.launch_url)
     return evaluation_unit
Пример #12
0
class Agent(ABC):
    """
    This class encompasses a worker as they are working on an individual assignment.
    It maintains details for the current task at hand such as start and end time,
    connection status, etc.
    """
    def __init__(self,
                 db: "MephistoDB",
                 db_id: str,
                 row: Optional[Mapping[str, Any]] = None):
        self.db: "MephistoDB" = db
        if row is None:
            row = db.get_agent(db_id)
        assert row is not None, f"Given db_id {db_id} did not exist in given db"
        self.db_id: str = row["agent_id"]
        self.db_status = row["status"]
        self.worker_id = row["worker_id"]
        self.unit_id = row["unit_id"]
        self.task_type = row["task_type"]
        self.provider_type = row["provider_type"]
        self.pending_observations: List["Packet"] = []
        self.pending_actions: List["Packet"] = []
        self.has_action = threading.Event()
        self.has_action.clear()
        self.wants_action = threading.Event()
        self.wants_action.clear()
        self.has_updated_status = threading.Event()
        self.assignment_id = row["assignment_id"]
        self.task_run_id = row["task_run_id"]
        self.task_id = row["task_id"]
        self.did_submit = threading.Event()

        # Deferred loading of related entities
        self._worker: Optional["Worker"] = None
        self._unit: Optional["Unit"] = None
        self._assignment: Optional["Assignment"] = None
        self._task_run: Optional["TaskRun"] = None
        self._task: Optional["Task"] = None

        # Follow-up initialization
        self.state = AgentState(self)  # type: ignore

    def __new__(cls,
                db: "MephistoDB",
                db_id: str,
                row: Optional[Mapping[str, Any]] = None) -> "Agent":
        """
        The new method is overridden to be able to automatically generate
        the expected Agent class without needing to specifically find it
        for a given db_id. As such it is impossible to create a base Agent
        as you will instead be returned the correct Agent class according to
        the crowdprovider associated with this Agent.
        """
        from mephisto.core.registry import get_crowd_provider_from_type

        if cls == Agent:
            # We are trying to construct a Agent, find what type to use and
            # create that instead
            if row is None:
                row = db.get_agent(db_id)
            assert row is not None, f"Given db_id {db_id} did not exist in given db"
            correct_class = get_crowd_provider_from_type(
                row["provider_type"]).AgentClass
            return super().__new__(correct_class)
        else:
            # We are constructing another instance directly
            return super().__new__(cls)

    def get_agent_id(self) -> str:
        """Return this agent's id"""
        return self.db_id

    def get_worker(self) -> Worker:
        """
        Return the worker that is using this agent for a task
        """
        if self._worker is None:
            self._worker = Worker(self.db, self.worker_id)
        return self._worker

    def get_unit(self) -> "Unit":
        """
        Return the Unit that this agent is working on.
        """
        if self._unit is None:
            from mephisto.data_model.assignment import Unit

            self._unit = Unit(self.db, self.unit_id)
        return self._unit

    def get_assignment(self) -> "Assignment":
        """Return the assignment this agent is working on"""
        if self._assignment is None:
            if self._unit is not None:
                self._assignment = self._unit.get_assignment()
            else:
                from mephisto.data_model.assignment import Assignment

                self._assignment = Assignment(self.db, self.assignment_id)
        return self._assignment

    def get_task_run(self) -> "TaskRun":
        """Return the TaskRun this agent is working within"""
        if self._task_run is None:
            if self._unit is not None:
                self._task_run = self._unit.get_task_run()
            elif self._assignment is not None:
                self._task_run = self._assignment.get_task_run()
            else:
                from mephisto.data_model.task import TaskRun

                self._task_run = TaskRun(self.db, self.task_run_id)
        return self._task_run

    def get_task(self) -> "Task":
        """Return the Task this agent is working within"""
        if self._task is None:
            if self._unit is not None:
                self._task = self._unit.get_task()
            elif self._assignment is not None:
                self._task = self._assignment.get_task()
            elif self._task_run is not None:
                self._task = self._task_run.get_task()
            else:
                from mephisto.data_model.task import Task

                self._task = Task(self.db, self.task_id)
        return self._task

    def get_data_dir(self) -> str:
        """
        Return the directory to be storing any agent state for
        this agent into
        """
        assignment_dir = self.get_assignment().get_data_dir()
        return os.path.join(assignment_dir, self.db_id)

    def update_status(self, new_status: str) -> None:
        """Update the database status of this agent, and
        possibly send a message to the frontend agent informing
        them of this update"""
        if self.db_status == new_status:
            return  # Noop, this is already the case
        if self.db_status in AgentState.complete():
            print(f"Updating a final status, was {self.db_status} "
                  f"and want to set to {new_status}")
        self.db.update_agent(self.db_id, status=new_status)
        self.db_status = new_status
        self.has_updated_status.set()
        if new_status in [
                AgentState.STATUS_RETURNED, AgentState.STATUS_DISCONNECT
        ]:
            # Disconnect statuses should free any pending acts
            self.has_action.set()
            self.did_submit.set()

    @staticmethod
    def _register_agent(db: "MephistoDB", worker: Worker, unit: "Unit",
                        provider_type: str) -> "Agent":
        """
        Create this agent in the mephisto db with the correct setup
        """
        db_id = db.new_agent(
            worker.db_id,
            unit.db_id,
            unit.task_id,
            unit.task_run_id,
            unit.assignment_id,
            unit.task_type,
            provider_type,
        )
        a = Agent(db, db_id)
        a.update_status(AgentState.STATUS_ACCEPTED)
        return a

    # Specialized child cases may need to implement the following

    @classmethod
    def new_from_provider_data(
        cls,
        db: "MephistoDB",
        worker: Worker,
        unit: "Unit",
        provider_data: Dict[str, Any],
    ) -> "Agent":
        """
        Wrapper around the new method that allows registering additional
        bookkeeping information from a crowd provider for this agent
        """
        agent = cls.new(db, worker, unit)
        unit.worker_id = worker.db_id
        agent._unit = unit
        return agent

    def observe(self, packet: "Packet") -> None:
        """
        Pass the observed information to the AgentState, then
        queue the information to be pushed to the user
        """
        sending_packet = packet.copy()
        sending_packet.receiver_id = self.db_id
        self.state.update_data(sending_packet)
        self.pending_observations.append(sending_packet)

    def act(self, timeout: Optional[int] = None) -> Optional["Packet"]:
        """
        Request information from the Agent's frontend. If non-blocking,
        (timeout is None) should return None if no actions are ready
        to be returned.
        """
        if len(self.pending_actions) == 0:
            self.wants_action.set()
            if timeout is None or timeout == 0:
                return None
            self.has_action.wait(timeout)

        if len(self.pending_actions) == 0:
            # various disconnect cases
            status = self.get_status()
            if status == AgentState.STATUS_DISCONNECT:
                raise AgentDisconnectedError(self.db_id)
            elif status == AgentState.STATUS_RETURNED:
                raise AgentReturnedError(self.db_id)
            self.update_status(AgentState.STATUS_TIMEOUT)
            raise AgentTimeoutError(timeout, self.db_id)
        assert len(
            self.pending_actions) > 0, "has_action released without an action!"

        act = self.pending_actions.pop(0)

        if "MEPHISTO_is_submit" in act.data and act.data["MEPHISTO_is_submit"]:
            self.did_submit.set()

        if len(self.pending_actions) == 0:
            self.has_action.clear()
        self.state.update_data(act)
        return act

    def get_status(self) -> str:
        """Get the status of this agent in their work on their unit"""
        if self.db_status not in AgentState.complete():
            row = self.db.get_agent(self.db_id)
            if row["status"] != self.db_status:
                if row["status"] in [
                        AgentState.STATUS_RETURNED,
                        AgentState.STATUS_DISCONNECT,
                ]:
                    # Disconnect statuses should free any pending acts
                    self.has_action.set()
                self.has_updated_status.set()
            self.db_status = row["status"]
        return self.db_status

    # Children classes should implement the following methods

    def approve_work(self) -> None:
        """Approve the work done on this agent's specific Unit"""
        raise NotImplementedError()

    def soft_reject_work(self) -> None:
        """
        Pay a worker for attempted work, but mark it as below the 
        quality bar for this assignment
        """
        # TODO(OWN) extend this method to assign a soft block
        # qualification automatically if a threshold of
        # soft rejects as a proportion of total accepts
        # is exceeded
        self.approve_work()
        self.update_status(AgentState.STATUS_SOFT_REJECTED)

    def reject_work(self, reason) -> None:
        """Reject the work done on this agent's specific Unit"""
        raise NotImplementedError()

    def mark_done(self) -> None:
        """
        Take any required step with the crowd_provider to ensure that
        the worker can submit their work and be marked as complete via
        a call to get_status
        """
        raise NotImplementedError()

    @staticmethod
    def new(db: "MephistoDB", worker: Worker, unit: "Unit") -> "Agent":
        """
        Create an agent for this worker to be used for work on the given Unit.

        Implementation should return the result of _register_agent when sure the agent
        can be successfully created to have it put into the db.
        """
        raise NotImplementedError()
Пример #13
0
 def assignment_completed_successfully(self,
                                       assignment: Assignment) -> bool:
     """Validate that an assignment is able to be run successfully"""
     return assignment.get_status() == AssignmentState.COMPLETED
Пример #14
0
class Unit(ABC):
    """
    This class tracks the status of an individual worker's contribution to a
    higher level assignment. It is the smallest 'unit' of work to complete
    the assignment, and this class is only responsible for checking
    the status of that work itself being done.

    It should be extended for usage with a specific crowd provider
    """
    def __init__(self,
                 db: "MephistoDB",
                 db_id: str,
                 row: Optional[Mapping[str, Any]] = None):
        self.db: "MephistoDB" = db
        if row is None:
            row = db.get_unit(db_id)
        assert row is not None, f"Given db_id {db_id} did not exist in given db"
        self.db_id: str = row["unit_id"]
        self.assignment_id = row["assignment_id"]
        self.unit_index = row["unit_index"]
        self.pay_amount = row["pay_amount"]
        self.agent_id = row["agent_id"]
        self.provider_type = row["provider_type"]
        self.db_status = row["status"]
        self.task_type = row["task_type"]
        self.task_id = row["task_id"]
        self.task_run_id = row["task_run_id"]
        self.sandbox = row["sandbox"]
        self.requester_id = row["requester_id"]
        self.worker_id = row["worker_id"]

        # Deferred loading of related entities
        self.__task: Optional["Task"] = None
        self.__task_run: Optional["TaskRun"] = None
        self.__assignment: Optional["Assignment"] = None
        self.__requester: Optional["Requester"] = None
        self.__agent: Optional["Agent"] = None
        self.__worker: Optional["Worker"] = None

    def __new__(cls,
                db: "MephistoDB",
                db_id: str,
                row: Optional[Mapping[str, Any]] = None) -> "Unit":
        """
        The new method is overridden to be able to automatically generate
        the expected Unit class without needing to specifically find it
        for a given db_id. As such it is impossible to create a Unit
        as you will instead be returned the correct Unit class according to
        the crowdprovider associated with this Unit.
        """
        if cls == Unit:
            # We are trying to construct a Unit, find what type to use and
            # create that instead
            from mephisto.operations.registry import get_crowd_provider_from_type

            if row is None:
                row = db.get_unit(db_id)
            assert row is not None, f"Given db_id {db_id} did not exist in given db"
            correct_class = get_crowd_provider_from_type(
                row["provider_type"]).UnitClass
            return super().__new__(correct_class)
        else:
            # We are constructing another instance directly
            return super().__new__(cls)

    def get_crowd_provider_class(self) -> Type["CrowdProvider"]:
        """Get the CrowdProvider class that manages this Unit"""
        from mephisto.operations.registry import get_crowd_provider_from_type

        return get_crowd_provider_from_type(self.provider_type)

    def get_assignment_data(self) -> Optional[Dict[str, Any]]:
        """Return the specific assignment data for this assignment"""
        return self.get_assignment().get_assignment_data()

    def sync_status(self) -> None:
        """
        Ensure that the queried status from this unit and the db status
        are up to date
        """
        # TODO(102) this will need to be run periodically/on crashes
        # to sync any lost state
        self.set_db_status(self.get_status())

    def get_db_status(self) -> str:
        """
        Return the status as currently stored in the database
        """
        if self.db_status in AssignmentState.final_unit():
            return self.db_status
        row = self.db.get_unit(self.db_id)
        assert row is not None, f"Unit {self.db_id} stopped existing in the db..."
        return row["status"]

    def set_db_status(self, status: str) -> None:
        """
        Set the status reflected in the database for this Unit
        """
        assert (
            status in AssignmentState.valid_unit()
        ), f"{status} not valid Assignment Status, not in {AssignmentState.valid_unit()}"
        self.db_status = status
        self.db.update_unit(self.db_id, status=status)

    def get_assignment(self) -> "Assignment":
        """
        Return the assignment that this Unit is part of.
        """
        if self.__assignment is None:
            from mephisto.data_model.assignment import Assignment

            self.__assignment = Assignment(self.db, self.assignment_id)
        return self.__assignment

    def get_task_run(self) -> TaskRun:
        """
        Return the task run that this assignment is part of
        """
        if self.__task_run is None:
            if self.__assignment is not None:
                self.__task_run = self.__assignment.get_task_run()
            else:
                self.__task_run = TaskRun(self.db, self.task_run_id)
        return self.__task_run

    def get_task(self) -> Task:
        """
        Return the task that this assignment is part of
        """
        if self.__task is None:
            if self.__assignment is not None:
                self.__task = self.__assignment.get_task()
            elif self.__task_run is not None:
                self.__task = self.__task_run.get_task()
            else:
                self.__task = Task(self.db, self.task_id)
        return self.__task

    def get_requester(self) -> "Requester":
        """
        Return the requester who offered this Unit
        """
        if self.__requester is None:
            if self.__assignment is not None:
                self.__requester = self.__assignment.get_requester()
            elif self.__task_run is not None:
                self.__requester = self.__task_run.get_requester()
            else:
                self.__requester = Requester(self.db, self.requester_id)
        return self.__requester

    def clear_assigned_agent(self) -> None:
        """Clear the agent that is assigned to this unit"""
        self.db.clear_unit_agent_assignment(self.db_id)
        self.agent_id = None
        self.__agent = None

    def get_assigned_agent(self) -> Optional[Agent]:
        """
        Get the agent assigned to this Unit if there is one, else return None
        """
        # In these statuses, we know the agent isn't changing anymore, and thus will
        # not need to be re-queried
        # TODO(#97) add test to ensure this behavior/assumption holds always
        if self.db_status in AssignmentState.final_unit():
            if self.agent_id is None:
                return None
            return Agent(self.db, self.agent_id)

        # Query the database to get the most up-to-date assignment, as this can
        # change after instantiation if the Unit status isn't final
        # TODO(#101) this may not be particularly efficient
        row = self.db.get_unit(self.db_id)
        assert row is not None, f"Unit {self.db_id} stopped existing in the db..."
        agent_id = row["agent_id"]
        if agent_id is not None:
            return Agent(self.db, agent_id)
        return None

    @staticmethod
    def _register_unit(
        db: "MephistoDB",
        assignment: "Assignment",
        index: int,
        pay_amount: float,
        provider_type: str,
    ) -> "Unit":
        """
        Create an entry for this unit in the database
        """
        db_id = db.new_unit(
            assignment.task_id,
            assignment.task_run_id,
            assignment.requester_id,
            assignment.db_id,
            index,
            pay_amount,
            provider_type,
            assignment.task_type,
        )
        return Unit(db, db_id)

    def get_pay_amount(self) -> float:
        """
        Return the amount that this Unit is costing against the budget,
        calculating additional fees as relevant
        """
        return self.pay_amount

    # Children classes may need to override the following

    def get_status(self) -> str:
        """
        Get the status of this unit, as determined by whether there's
        a worker working on it at the moment, and any other possible states. Should
        return one of UNIT_STATUSES

        Accurate status is crowd-provider dependent, and thus this method should be
        defined in the child class to ensure that the local record matches
        the ground truth in the provider
        """
        from mephisto.abstractions.blueprint import AgentState

        db_status = self.db_status
        computed_status = AssignmentState.LAUNCHED

        agent = self.get_assigned_agent()
        if agent is None:
            row = self.db.get_unit(self.db_id)
            computed_status = row["status"]
        else:
            agent_status = agent.get_status()
            if agent_status == AgentState.STATUS_NONE:
                computed_status = AssignmentState.LAUNCHED
            elif agent_status in [
                    AgentState.STATUS_ACCEPTED,
                    AgentState.STATUS_ONBOARDING,
                    AgentState.STATUS_PARTNER_DISCONNECT,
                    AgentState.STATUS_WAITING,
                    AgentState.STATUS_IN_TASK,
            ]:
                computed_status = AssignmentState.ASSIGNED
            elif agent_status in [AgentState.STATUS_COMPLETED]:
                computed_status = AssignmentState.COMPLETED
            elif agent_status in [AgentState.STATUS_SOFT_REJECTED]:
                computed_status = AssignmentState.SOFT_REJECTED
            elif agent_status in [AgentState.STATUS_EXPIRED]:
                computed_status = AssignmentState.EXPIRED
            elif agent_status in [
                    AgentState.STATUS_DISCONNECT,
                    AgentState.STATUS_RETURNED,
            ]:
                computed_status = AssignmentState.ASSIGNED
            elif agent_status == AgentState.STATUS_APPROVED:
                computed_status = AssignmentState.ACCEPTED
            elif agent_status == AgentState.STATUS_REJECTED:
                computed_status = AssignmentState.REJECTED

        if computed_status != db_status:
            self.set_db_status(computed_status)

        return computed_status

    # Children classes should implement the below methods

    def launch(self, task_url: str) -> None:
        """
        Make this Unit available on the crowdsourcing vendor. Depending on
        the task type, this could mean a number of different setup steps.

        Some crowd providers require setting up a configuration for the
        very first launch, and this method should call a helper to manage
        that step if necessary.
        """
        raise NotImplementedError()

    def expire(self) -> float:
        """
        Expire this unit, removing it from being workable on the vendor.
        Return the maximum time needed to wait before we know it's taken down.
        """
        raise NotImplementedError()

    def is_expired(self) -> bool:
        """Determine if this unit is expired as according to the vendor."""
        raise NotImplementedError()

    @staticmethod
    def new(db: "MephistoDB", assignment: "Assignment", index: int,
            pay_amount: float) -> "Unit":
        """
        Create a Unit for the given assignment

        Implementation should return the result of _register_unit when sure the unit
        can be successfully created to have it put into the db.
        """
        raise NotImplementedError()
Пример #15
0
        "inputs": {"something": True, "something else": False},
        "outputs": {"some": "annotations"},
    }
]

# Write a new task, and then complete it
for annotation in test_annotations:
    assignment_id = db.new_assignment(
        task_run.task_id,
        task_run.db_id,
        task_run.requester_id,
        task_run.task_type,
        task_run.provider_type,
        task_run.sandbox,
    )
    assignment = Assignment(db, assignment_id)
    assignment.write_assignment_data(
        InitializationData(unit_data={}, shared=annotation["inputs"])
    )

    unit_id = db.new_unit(
        task_run.task_id,
        task_run.db_id,
        task_run.requester_id,
        assignment_id,
        0,  # Unit_index
        0,  # reward
        task_run.provider_type,
        task_run.task_type,
        task_run.sandbox,
    )
Пример #16
0
def main():
    """
    Script to launch makeup tasks for workers that
    can't be bonused via other avenues.

    Creates a task for a worker, qualifying them directly,
    and marks as a soft_rejected HIT for the given task name.
    """
    db = LocalMephistoDB()

    task_name = input(
        "Please enter a task name for bookkeeping. This task name will be tied to "
        "the additional spend granted through this script, and should be the same "
        "as the task you originally launched that you now need to compensate for:\n>> "
    )
    tasks = db.find_tasks(task_name=task_name)
    if len(tasks) == 0:
        print("No tasks found with the given name...")
        all_tasks = db.find_tasks()
        all_names = set([t.task_name for t in all_tasks])
        print(
            f"Choose an existing task of {all_names} to use this functionality."
        )
        print(f"Compensation hits must be tied to an existing task")
        return 0
    task = tasks[0]

    req_name = input(
        "Please enter an MTurkRequester name to use to bonus from:\n>> ")
    requesters = db.find_requesters(requester_name=req_name)
    if len(requesters) == 0:
        print("Could not find a requester by that name...")
        return 0
    requester = requesters[0]
    client = requester._get_client(requester._requester_name)

    print(
        "You can now enter a worker id, amount, and reason for as many compensation tasks "
        "as you want to launch for this.")
    compensation_hits = []
    amount = None
    reason = None
    while True:
        worker_id = input(
            "Enter a worker id to compensate. Leave blank to move on to launching: \n>> "
        ).strip()
        if len(worker_id) == 0:
            break
        prev_amount = "" if amount is None else f" (leave blank for ${amount})"
        next_amount = input(
            f"Enter the amount in dollars to pay out in this compensation task{prev_amount}:\n>> $"
        )
        amount = float(next_amount) if len(
            next_amount.strip()) != 0 else amount
        assert amount is not None, "Amount can not be left blank"
        prev_reason = "" if reason is None else f" (leave blank for '{reason}'"
        next_reason = input(
            f"Provide reason for launching this compensation task. This will be sent to the worker{prev_reason}:\n>> "
        )
        reason = next_reason if len(next_reason.strip()) != 0 else reason
        assert reason is not None, "Reason can not be left blank"
        compensation_hits.append({
            "worker_id": worker_id,
            "amount": amount,
            "reason": reason,
        })
    if len(compensation_hits) == 0:
        print("No compensation details provided, exiting")
        return 0

    print(f"You entered the following tasks:\n{compensation_hits}")
    input("Input anything to confirm and continue...")

    # Iterate through and launch tasks
    for comp_dict in compensation_hits:
        # Create the MTurk qualification for this specific worker
        worker_id = comp_dict["worker_id"]
        qual_name = f"compensation-for-{worker_id}-on-{task_name}"
        print(f"Creating qualification for {worker_id}: {qual_name}....")
        qualification = make_qualification_dict(qual_name, QUAL_EXISTS, None)
        qual_map = requester.datastore.get_qualification_mapping(qual_name)
        if qual_map is None:
            qualification[
                "QualificationTypeId"] = requester._create_new_mturk_qualification(
                    qual_name)
        else:
            qualification["QualificationTypeId"] = qual_map[
                "mturk_qualification_id"]
        give_worker_qualification(client, worker_id,
                                  qualification["QualificationTypeId"])

        # Create the task run for this HIT
        print(f"Creating task run and data model components for this HIT")
        config = build_task_config(comp_dict, requester)
        init_params = OmegaConf.to_yaml(OmegaConf.structured(config))
        new_run_id = db.new_task_run(
            task.db_id,
            requester.db_id,
            json.dumps(init_params),
            requester.provider_type,
            "mock",
            requester.is_sandbox(),
        )
        task_run = TaskRun.get(db, new_run_id)

        # Create an assignment, unit, agent, and mark as assigned
        # Assignment creation
        task_args = task_run.get_task_args()
        assignment_id = db.new_assignment(
            task_run.task_id,
            task_run.db_id,
            task_run.requester_id,
            task_run.task_type,
            task_run.provider_type,
            task_run.sandbox,
        )
        data = InitializationData({}, [{}])
        assignment = Assignment.get(db, assignment_id)
        assignment.write_assignment_data(data)

        # Unit creation
        unit_id = db.new_unit(
            task_run.task_id,
            task_run.db_id,
            task_run.requester_id,
            assignment_id,
            COMPENSATION_UNIT_INDEX,
            task_args.task_reward,
            task_run.provider_type,
            task_run.task_type,
            task_run.sandbox,
        )
        compensation_unit = Unit.get(db, unit_id)
        print(f"Created {task_run}, {assignment}, and {compensation_unit}...")

        # Set up HIT type
        hit_type_id = create_hit_type(
            client,
            task_run.get_task_args(),
            [qualification],
            auto_approve_delay=30,
            skip_locale_qual=True,
        )

        # Create the task on MTurk, email the worker
        print("Creating and deploying task on MTurk")
        duration = 60 * 60 * 24
        run_id = task_run.db_id
        hit_link, hit_id, response = create_compensation_hit_with_hit_type(
            client, comp_dict["reason"], hit_type_id)
        requester.datastore.new_hit(hit_id, hit_link, duration, task_run.db_id)

        print("Sending email to worker...")
        result = email_worker(
            client,
            worker_id,
            "Compensation HIT Launched",
            ("Hello Worker,\n We've launched a compensation hit for a task that you've worked on "
             f"for us in the past. The reason supplied for this task was: {reason}. This task is "
             f"only doable by you, and should reward ${comp_dict['amount']}. Thanks for being a valued "
             "contributor to our tasks, and for allowing us to try and resolve the issue.\n\n"
             f"Your task can be accessed at the following link: {hit_link}."),
        )

        if not result[0]:
            print(
                f"Email send failed, for reason {result[1]}\n"
                f"Please send {hit_link} to {worker_id} yourself if they reached out about this issue."
            )

        # Mark the agent as soft_rejected, such that we've "paid" it
        compensation_unit.set_db_status(AssignmentState.SOFT_REJECTED)