Exemplo n.º 1
0
    def post_design(
        self,
        design: dict,
        allow_duplicates: bool = False,
    ):
        """Sumbits a new design into DESIGN module.

        Args:
            design (dict): A dictionary with the design. This dictionary is very complex, but it can be generated \
                easily with the `build_design_from_candidates` method at *utils*

            allow_duplicates (bool): Set to True to avoid raising errors on detection of parts duplication (default \
                value is False).

        Returns:
            dict: On success, returns a dict containing the id of the new design (ex: `{'id': 5}` )
        """
        body = {
            'designJson': design,
            'allowDuplicates': allow_duplicates,
        }
        response = post(url=self.post_designs_url,
                        headers=self.headers,
                        json=body)
        return json.loads(response['content'])
Exemplo n.º 2
0
    def get_model_info(
        self,
        model_id: ModelID,
    ):
        """Retrieves model general information.

        This will return a JSON object with the metadata of a model filtered by the provided model ID.

        Args :
            model_id (ModelID): Model identifier.

        Returns :
            () : A dict containing model info. An example is shown below:

        ```json
        {
            "id": "0",
            "labId": "1",
            "modelType": "predictive",
            "name": "My First Predictive Model",
            "description": "This is an example model",
            "status": "completed-successfully",
            "evolveModelInfo": {
                "microserviceQueueId":
                "1",
                "dataSchema": [{
                    "id": "1",
                    "name": "Descriptor1",
                    "value_type": "numeric",
                    "type": "descriptor"
                }, {
                    "id": "1",
                    "name": "Descriptor2",
                    "value_type": "numeric",
                    "type": "descriptor"
                }, {
                    "id": "2",
                    "name": "Target",
                    "value_type": "numeric",
                    "type": "target"
                }],
                "modelStats": {
                    "MAE": 45
                }
            }
        }
        ```
        """
        body = {
            'id': str(model_id),
        }
        response = post(url=self.get_model_url,
                        headers=self.headers,
                        json=body)
        response['content'] = json.loads(response['content'])

        # Check output
        return self._get_data_from_content(response['content'])
Exemplo n.º 3
0
    def post_codon_optimization_job(
        self,
        algorithm='ALGORITHMS_NAME',
        parameters=None,
    ):
        parameters = {} if parameters is None else parameters

        body = {
            'algorithm': algorithm,
            'parameters': parameters,
        }
        response = post(url=self.post_codon_op,
                        headers=self.headers,
                        json=body)
        return json.loads(response['content'])
Exemplo n.º 4
0
    def cancel_task(
        self,
        task_id: TaskID,
    ) -> Any:
        """Cancels the submission of a task matching the specified `task_id`.

        Args:
            task_id (TaskID): The task id that wants to be canceled.

        Returns:
            ():
        """
        response = post(
            url=self.cancel_task_url.format(task_id),
            headers=self.headers,
        )
        return json.loads(response['content'])
Exemplo n.º 5
0
    def register(
        self,
        username: str,
        password: str,
    ):
        """Registers a new user.

        NB: Registering a new user might require ADMIN privileges.
        """
        body = {
            'email': username,
            'firstName': 'test',
            'lastName': 'user',
            'password': password,
            'passwordConfirm': password,
        }
        response = post(url=self.register_url, json=body)
        response['content'] = json.loads(response['content'])
        return response
Exemplo n.º 6
0
    def cancel_model(
        self,
        model_id: ModelID,
    ):
        """Cancels the submission of a model matching the specified `model_id`.

        Args:
            model_id (ModelID): The model id that wants to be canceled.

        Returns :
            () :
        """
        body = {
            'id': str(model_id),
        }
        response = post(url=self.cancel_model_url,
                        headers=self.headers,
                        json=body)
        response['content'] = json.loads(response['content'])
        return self._get_data_from_content(response['content'])
Exemplo n.º 7
0
    def submit_prediction_task(
        self,
        data_input: List[Dict[str, Any]],
        data_schema: List[Dict[str, Any]],
        model_id: ModelID,
    ) -> Dict[str, Any]:
        """Submits a task used to run predictions on a list of datapoints using a pre-trained Predictive Model.

        Args:
            data_input (List[Dict[str, Any]]): Datapoints in the same format described in the submit_model function.
            data_schema (List[Dict[str, Any]]): Data schema in the same format described in the submit_model function.
            model_id (ModelID): ID of the pre-trained predictive model going to be used to run predictions for the datapoints in the data_input list.

        Returns:
            - A Task object with metadata information on the submitted task including its ID for later retrieval.
        """
        body = {
            'dataInput': data_input,
            'dataSchema': data_schema,
            'modelType': 'predictive',
            'predictiveModelId': model_id,
            # 'configs': {} if configs is None else configs,
            'name': 'pretrained',
            # 'description': '' if description is None else description
        }
        response = post(
            url=self.submit_model_url,
            headers=self.headers,
            json=body,
        )

        responseContent: Dict[str, Any] = json.loads(
            response['content'])  # noqa: N806

        responseContent['data'].update({'pretrainedModelId': model_id})

        return responseContent
Exemplo n.º 8
0
    def import_aa_sequences(
        self,
        aa_sequences: Union[pd.DataFrame, List[List[str]],
                            List[Tuple[str, str]], List[Dict[str, str]]],
        tags: Optional[List[Dict[str, int]]] = None,
    ):
        """This function imports one or many amino acid sequences by means of TeselaGen's DESIGN API.

        Args:
            aa_sequences(Union[pd.DataFrame, List[Dict[str,str]], List[Tuple[str, str]]): Amino acid sequences data. The data can come in three different ways:
                - as a pandas dataframe with 2 columns. Where the first column contains the sequence names and the second column contains the amino acid sequence string.
                - as a list of python dictionaries, where each dictionary is of the form `{"AA_NAME": SEQUENCE_NAME, "AA_SEQUENCE": SEQUENCE_STRING}`.
                - as a list of 2-element tuples, where the first element is the sequence name and the second element the sequence string.

            tags(Optional[List[int]]): A list of integer tag IDs with which each amino acid sequence will be tagged with.
                (NOTE: tags cannot be created on-the-fly through this function, it only accepts tag IDs that are already created in the DESIGN Module).

        Returns:
            A JSON object with the following two key/values:
                - createdAminoAcidSequences(): 'id' and 'name' of the created amino acid sequences.
                - existingAminoAcidSequences(): 'id' of the updated amino acid sequences.
        """
        params = {}

        if aa_sequences is None:
            raise Exception("The 'aa_sequences' argument is mandatory.")

        if isinstance(aa_sequences, pd.DataFrame):
            params['name'] = aa_sequences.iloc[:, 0].values.tolist()
            params['contents'] = aa_sequences.iloc[:, 1].values.tolist()

        elif isinstance(aa_sequences, list):
            if all(isinstance(x, list) and len(x) == 2 for x in aa_sequences):
                params['name'] = list(map(lambda x: x[0], aa_sequences))
                params['contents'] = list(map(lambda x: x[1], aa_sequences))

            if all(isinstance(x, tuple) and len(x) == 2 for x in aa_sequences):
                params['name'] = list(map(lambda x: x[0], aa_sequences))
                params['contents'] = list(map(lambda x: x[1], aa_sequences))

            elif all(isinstance(x, dict) for x in aa_sequences):
                params['name'] = list(map(lambda x: x['AA_NAME'],
                                          aa_sequences))
                params['contents'] = list(
                    map(lambda x: x['AA_SEQUENCE'], aa_sequences))

            else:
                raise ValueError(
                    "All elements in list argument 'aa_sequences' must either be 2-element tuples or properly "
                    "formatted dictionaries according to the function's Args description."
                )
        else:
            raise ValueError(
                f"Type {type(aa_sequences)} for argument 'aa_sequences' is not supported."
            )

        if tags is not None and isinstance(tags, list):
            params['tags'] = list(map(lambda x: {'id': x}, tags))

        try:
            result = post(url=self.import_aa_url,
                          data=json.dumps(params),
                          headers=self.headers)
        except Exception as e:
            return e

        parsed_api_result = json.loads(result['content'])

        formatted_response = {}

        created_aa_seqs_key = 'createdAminoAcidSequences'
        updated_aa_seqs_key = 'existingAminoAcidSequences'

        if (created_aa_seqs_key in parsed_api_result.keys()
                and len(parsed_api_result[created_aa_seqs_key]) > 0):
            formatted_response[created_aa_seqs_key] = list(
                map(lambda x: {
                    'id': x['id'],
                    'name': x['name'],
                }, parsed_api_result[created_aa_seqs_key]))

        if (updated_aa_seqs_key in parsed_api_result.keys()
                and len(parsed_api_result[updated_aa_seqs_key]) > 0):
            formatted_response[updated_aa_seqs_key] = list(
                map(lambda x: {
                    'id': x['id'],
                }, parsed_api_result[updated_aa_seqs_key]))

        return formatted_response
Exemplo n.º 9
0
    def rbs_calculator_submit_job(
        self,
        algorithm: str,
        params: Dict[str, Any],
    ) -> dict:
        """Submits a job to the RBS Calculator API Version v2.1. For deeper information on the RBS Calculator tools
        please refer to the following documentation:

        - Paper: https://www.researchgate.net/publication/51155303_The_Ribosome_Binding_Site_Calculator.
        - Browser Application: https://salislab.net/software/
        - Swagger API Documentation: https://app.swaggerhub.com/apis-docs/DeNovoDNA/JobControl/1.0.1


        The TeselaGen/RBS Integration currently supports one of the three following RBS Calculator Tools:

        - "ReverseRBS": Calls the RBS Calculator in Reverse Engineering mode to predict the translation
            initiation rate of each start codon in a mRNA sequence. ([Predict Translation Rates](https://salislab.net/software/predict_rbs_calculator))

            parameters:
                mRNA (str): Valid 'GATCU' mRNA sequence.
                long_UTR (boolean): Enables long UTRs.
                organism (str): Valid organism name. (for all available organism names, please call the 'rbs_calculator_organisms' function)


        - "RBSLibraryCalculator_SearchMode": Calls the RBS Library Calculator in Search mode to design a ribosome binding site library
            to maximally cover a selected  translation rate space between a targeted minimum and maximum rate
            using the fewest number of RBS variants ([Optimize Expression Levels](https://salislab.net/software/design_rbs_library_calculator)).

            parameters:
                CDS (str): Valid 'GATCU' coding sequence.
                RBS_Constrains (str): Either an empty string or a valid degenerate nucleotide sequence ('GATCURYSWKMBDHVN').
                initial_RBS_sequence (str): Either an empty string or a valid 'GATCU' RBS sequence.
                    This is used to initialize the RBS sequence exploration algorithm. If an empty string is provided,
                    a random RBS sequence will be used as the initializing sequence.
                library_size (int): Number of RBS sequences in your library.
                maximum_consecutive_degeneracy (int): The maximum number of consecutive degeneracy nucleotides for the RBS library designs.
                minimum_translation_initiation_rate (int): Lowest translation rate desired for your RBS library (proportional scale varies from 1 to 1,000,000).
                maximum_translation_initiation_rate (int): Highest translation rate desired for your RBS library (proportional scale varies from 1 to 1,000,000).
                organism (str): Valid organism name. (for all available organism names, please call the 'rbs_calculator_organisms' function).
                pre_sequence (str): Either an empty string or a valid 'GATCU' mRNA sequence that is required to appear upstream (5') of the RBS sequence.


        - "RBSLibraryCalculator_GenomeSearchMode": Calls the RBS Library Calculator in Genome Editing mode to design a genomic ribosome binding site library
            to maximally cover a selected translation rate space between a targeted minimum and maximum rate,  while introducing the
            fewest number of consecutive genomic mutations. ([Optimize Expression Levels](https://salislab.net/software/design_rbs_library_calculator)).

            parameters:
                CDS (str): Valid 'GATCU' coding sequence.
                RBS_Constrains (str): Either an empty string or a valid degenerate nucleotide sequence ('GATCURYSWKMBDHVN').
                genomic_RBS_sequence (str): Genomic RBS sequence. Must be a valid 'GATCU' sequence.
                initial_RBS_sequence (str): Either an empty string or a valid 'GATCU' RBS sequence.
                    This is used to initialize the RBS sequence exploration algorithm. If an empty string is provided,
                    a random RBS sequence will be used as the initializing sequence.
                library_size (int): Number of RBS sequences in your library.
                maximum_consecutive_degeneracy (int): The maximum number of consecutive degeneracy nucleotides for the RBS library designs.
                minimum_translation_initiation_rate (int): Lowest translation rate desired for your RBS library (proportional scale varies from 1 to 1,000,000).
                maximum_translation_initiation_rate (int): Highest translation rate desired for your RBS library (proportional scale varies from 1 to 1,000,000).
                organism (str): Valid organism name. (for all available organism names, please call the 'rbs_calculator_organisms' function).
                pre_sequence (str): Either an empty string or a valid 'GATCU' mRNA sequence that is required to appear upstream (5') of the RBS sequence.


        Args:
            algorithm (str): This should be one for the three algorithm described above currently supported by the TeselaGen/RBS Integration.
            params (dict): These are the parameters required by the chosen algorithms according to the RBS Calculator API Swagger specifications mentioned above.
                        For more information on the parameters meaning refer to the https://salislab.net/software/ browser application.

                        Examples for the tools parameter inputs are as follows:

                        'ReverseRBS' params:
                            {
                                "mRNA": "YOUR_mRNA_ SEQUENCE",
                                "long_UTR": false,
                                "organism": "Acetobacter pomorum"
                            }

                        'RBSLibraryCalculator_SearchMode' params:
                            {
                                "CDS": "YOUR_CDS_SEQUENCE",
                                "RBS_Constraints": 'TCTAGANNNNNNNNNNNNNNNNNNNNNNNNNGAATTC',
                                "initial_RBS_sequence": "GATTGCGTGTGAGTTCTGGCACGGAGGAGCACGTA",
                                "library_size": 16,
                                "maximum_consecutive_degeneracy": 6,
                                "maximum_translation_initiation_rate": 100,
                                "minimum_translation_initiation_rate": 10,
                                "organism": "Escherichia coli str. K-12 substr. MG1655",
                                "pre_sequence": ""
                            }
                        'RBSLibraryCalculator_GenomeSearchMode' params:
                            {
                                "CDS": "YOUR_CDS_SEQUENCE",
                                "RBS_Constraints": "",
                                "genomic_RBS_sequence": "CUCGUACGGUGCUAACGUGCUUAGU",
                                "initial_RBS_sequence": "",
                                "library_size": 16,
                                "maximum_consecutive_degeneracy": 6,
                                "maximum_translation_initiation_rate": 100,
                                "minimum_translation_initiation_rate": 10,
                                "organism": "Escherichia coli str. K-12 substr. MG1655",
                                "pre_sequence": ""
                            }

        Returns:
            JSON with RBS Calculator job response. This may depend on the chosen tool.
        """
        _params: str = json.dumps({
            **params,
            **{
                'algorithm': algorithm,
            },
        })

        try:
            result = post(url=self.rbs_calculator_submit_url,
                          data=_params,
                          headers=self.headers)
        except Exception as e:
            return {'error': e}

        result = json.loads(result['content'])

        return result
Exemplo n.º 10
0
    def design_crispr_grnas(
        self,
        sequence: str,
        target_indexes: Optional[Tuple[int, int]] = None,
        target_sequence: Optional[str] = None,
        pam_site: str = 'NGG',
        min_score: float = 40.0,
        max_number: Optional[int] = 50,
        wait_for_results: bool = True,
    ) -> Dict[str, Any]:
        """Gets CRISPR guide RNAs.

        Args:
            sequence (str): This is the genome sequence. The whole genome sequence is needed for more accurate \
                on/off target score predictions.

            target_indexes (Optional[Tuple[int, int]], optional): Start and End position (indexed from 0) of the \
                target sequence relative to the genome sequence. Defaults to None, meaning `target_sequence` \
                parameter will be used instead.

            target_sequence (Optional[str], optional): Sequence of the target. Defaults to None, meaning \
                `target_indexes` will be used.

            pam_site (str, optional): PAM Site of your CRISPR Enzyme (default: SpyoCas9 with PAM Site: 'NGG'). \
                Supported CRISPR Enzymes: SpyoCas9 ('NGG'), SaurCas9 ('NNGRR'), AsCas12a ('TTTV'). \
                Defaults to 'NGG'.

            min_score (float, optional): Minimum on-target score desired for the designed guide RNAs. \
                Defaults to 40.0.

            max_number (Optional[int], optional): Maximum number of guide RNAs to expected as a response. \
                Defaults to 50.

            wait_for_results (bool, optional): If `True`, the method waits for results to be ready from server and \
                gives a complete output. If `False` just returns a submit confirmation object without waiting for \
                finalization. Defaults to `True`.

        Returns:
            dict: If `wait_for_results` is `True`, the output will contain `guides`, a list with dictionaries \
                containing guide info (`sequence`, `start`, `end`, `onTargetScore` and `offTargetScore`) and \
                `target_indexes`, a list with the target start, end indexes within the main sequence. If \
                `wait_for_results` is `False` it will just return a dict with `taskID`, the id of the submitted \
                task, and a `message` string.
        """
        body: Dict[str, Any] = {
            'data': {
                'sequence': sequence,
            },
            'options': {
                'pamSite': pam_site,
                'minScore': min_score,
            },
        }
        if target_indexes is not None:
            body['data']['targetStart'] = target_indexes[0]
            body['data']['targetEnd'] = target_indexes[1]

        if target_sequence is not None:
            body['data']['targetSequence'] = target_sequence

        if max_number is not None:
            body['options']['maxNumber'] = max_number

        response = post(url=self.crispr_guide_rnas_url,
                        headers=self.headers,
                        json=body)
        result = json.loads(response['content'])

        if wait_for_results and 'taskId' in result:
            result = wait_for_status(
                method=self._design_crispr_grnas_get_result,
                validate=lambda x: x['status'] == 'completed-successfully',
                task_id=result['taskId'],
            )['data']

        return result
Exemplo n.º 11
0
    def submit_multi_objective_optimization(
        self,
        data_input: List[Any],
        data_schema: List[Any],
        pretrainedModelIds: List[Union[int, str]] = None,  # noqa: N803
        configs: Optional[Any] = None,
    ):
        """Submits a multi objective optimization task.

        Args:
            data_input (List[Any]): This is required and must contain a JSON array of JSON objects with the input \
                training data. These objects must be consistent with the `data_schema` property.

        ```json
                [{
                    "Descriptor1": "A0",
                    "Descriptor2": "B1",
                    "Target_1": "1",
                    "Target_2": "-1"
                }, {
                    "Descriptor1": "A0",
                    "Descriptor2": "B2",
                    "Target_1": "2",
                    "Target_2": "-2"
                }, {
                    "Descriptor1": "A0",
                    "Descriptor2": "B3",
                    "Target_1": "3",
                    "Target_2": "-3"
                }]
        ```

            data_schema (List[Any]): This is an array of the schema of the input data columns. The `name` property \
                corresponds to the column's name. he `type` property determines whether the column is a "target" or \
                a "descriptor" (feature). Only "target" and "descriptor" are supported. The `value_type` type \
                determines the type of the column's values. Only "numeric" and "categoric" are supported.

        ```json
                [{
                    "name": "Descriptor1",
                    "value_type": "categoric",
                    "type": "descriptor"
                }, {
                    "name": "Descriptor2",
                    "value_type": "categoric",
                    "type": "descriptor"
                }, {
                    "name": "Target_1",
                    "value_type": "numeric",
                    "type": "target"
                }, {
                    "name": "Target_2",
                    "value_type": "numeric",
                    "type": "target"
                }]
        ```
                - `name` : corresponds to the name of the column (descriptor or target)
                - `type` : describes whether the field is a descriptor (feature) or a target.
                - `value_type` : defines the type of value of this column. Available types are "numeric" or "categoric"


            configs (Optional[Any]): This is an advanced property containing advanced configuration for the training \
                execution. Please refer to Teselagen's Data Science Team.

        Returns :
            (dict): A dictionary containing info of the submitted job. En example is shown below:

        ```json
            {
                "authToken": "1d140371-a59f-4ad2-b57c-6fc8e0a20ff8",
                "checkInInterval": null,
                "controlToken": null,
                "id": "36",
                "input": {
                    "job": "modeling-tool",
                    "kwargs": {}
                },
                "lastCheckIn": null,
                "missedCheckInCount": null,
                "result": null,
                "resultStatus": null,
                "service": "ds-tools",
                "serviceUrl": null,
                "startedOn": null,
                "status": "created",
                "taskId": null,
                "trackingId": null,
                "completedOn": null,
                "createdAt": "2020-10-29T13:18:06.167Z",
                "updatedAt": "2020-10-29T13:18:06.271Z",
                "cid": null,
                "__typename": "microserviceQueue"
            }
        ```
        """
        body = {
            'dataInput': data_input,
            'dataSchema': data_schema,
            'predictiveModelIds': pretrainedModelIds,
            'configs': {} if configs is None else configs,
        }
        response = post(
            url=self.submit_multi_objective_optimization_url,
            headers=self.headers,
            json=body,
        )
        response['content'] = json.loads(response['content'])
        return response['content']
Exemplo n.º 12
0
    def submit_model(
        self,
        data_input: List[Any],
        data_schema: List[Any],
        model_type: ModelType,
        configs: Optional[Any] = None,
        name: str = '',
        description: Optional[str] = None,
    ) -> Dict[str, Any]:
        """Submits a model for training.

        Args :
            data_input (List[Any]): This is required and must contain a JSON array of JSON objects with the input \
                training data. These objects must be consistent with the `data_schema` property.

        ```json
                [{
                    "Descriptor1": "A0",
                    "Descriptor2": "B1",
                    "Target": "1"
                }, {
                    "Descriptor1": "A0",
                    "Descriptor2": "B2",
                    "Target": "2"
                }, {
                    "Descriptor1": "A0",
                    "Descriptor2": "B3",
                    "Target": "3"
                }]
        ```

            data_schema (List[Dict[str, Any]]): This is an array of the schema of the input data columns. The `name` \
                property corresponds to the column's name. The `type` property determines whether the column is a \
                "target" or a "descriptor" (feature). Only "target" and "descriptor" are supported. The `value_type` \
                type determines the type of the column's values. Only "numeric" and "categoric" are supported.

        ```json
                [{
                    "id": "1",
                    "name": "Descriptor1",
                    "value_type": "categoric",
                    "type": "descriptor"
                }, {
                    "id": "2",
                    "name": "Descriptor2",
                    "value_type": "categoric",
                    "type": "descriptor"
                }, {
                    "id": "3",
                    "name": "Target",
                    "value_type": "numeric",
                    "type": "target"
                }]
        ```
                - `id` : corresponds to the id (position) of the column in the
                    dataset.
                - `name` : corresponds to the name of the column (descriptor
                    or target)
                - `type` : describes whether the field is a descriptor
                    (feature) or a target.
                - `value_type` : defines the type of value of this column.
                    Available types are "numeric" or "categoric".

            model_type (ModelType) :
                The type of model wanting to submit. Either "predictive", "evolutive" or "generative".

                NOTE: If submitting a "generative" model, there's no "descriptor" column, in fact there should only be
                      one "target" column with the amino acid sequence. This needs to be properly set in the
                      dataSchema field according to the documentation.

            configs (Optional[Any]): This is an advanced property containing advanced configuration for the training \
                execution. Please refer to Teselagen's Data Science Team.

            name (str): This sets the Evolve Model's name.

            description (Optional[str]): This gives the Evolve Model's a description.

        Returns:
            (dict): A dictionary containing info of the submitted job. En example is shown below:

        ```json
            {
                "authToken": "1d140371-a59f-4ad2-b57c-6fc8e0a20ff8",
                "checkInInterval": null,
                "controlToken": null,
                "id": "36",
                "input": {
                    "job": "modeling-tool",
                    "kwargs": {}
                },
                "lastCheckIn": null,
                "missedCheckInCount": null,
                "result": null,
                "resultStatus": null,
                "service": "ds-tools",
                "serviceUrl": null,
                "startedOn": null,
                "status": "created",
                "taskId": null,
                "trackingId": null,
                "completedOn": null,
                "createdAt": "2020-10-29T13:18:06.167Z",
                "updatedAt": "2020-10-29T13:18:06.271Z",
                "cid": null,
                "__typename": "microserviceQueue"
            }
        ```
        """
        body = {
            'dataInput': data_input,
            'dataSchema': data_schema,
            'modelType': model_type,
            'configs': {} if configs is None else configs,
            'name': name,
            'description': '' if description is None else description,
        }
        response = post(url=self.submit_model_url,
                        headers=self.headers,
                        json=body)

        response['content'] = json.loads(response['content'])
        return self._get_data_from_content(response['content'])
Exemplo n.º 13
0
    def get_model_datapoints(
        self,
        model_id: ModelID,
        datapoint_type: str,
        batch_size: int,
        batch_number: int,
    ) -> Dict[str, Any]:
        """Return model datapoints.

        This will return a JSON object with an array of datapoints filtered by the provided model ID and datapoint \
        type.

        This array will come in the data field in the response body. Each element of the array has a datapoint \
        field, this corresponds to a JSON object with the datapoint data.

        Args :
            model_id (ModelID): ID of the model

            datapoint_type (str) : The `datapoint_type` has two options are "input", "output". One can fetch only \
                input datapoints (a.k.a training datapoints) or just fetch the output datapoint (a.k.a predicted \
                datapoints not seen in the training dataset).

            batch_size (int): `batch_size` refers to the number of datapoints to fetch from the database table.

            batch_number (int): `batch_number` depends on `batch_size`, and determines the index position offset of \
                length `batch_size` from where to start fetching datapoints.

        Returns :
            - An object with a 'data' key with the list of datapoints along with their predictions.

        ```json
            {
                "message": "Submission success.",
                "data": [{ ... }, { ... }, { ... }]
            }
        ```
        """
        body = {
            'modelId': str(model_id),
            'datapointType': datapoint_type,
            'batchSize': batch_size,
            'batchNumber': batch_number,
        }

        response = post(
            url=self.get_model_datapoints_url,
            headers=self.headers,
            json=body,
        )

        responseContent: Dict[str, Any] = json.loads(
            response['content'])  # noqa: N806

        datapoints: List[Dict[str, Any]] = []
        if 'data' in responseContent:
            datapoints = [{
                key: value
                for key, value in element['datapoint'].items()
                if key != 'set_tag' and 'PCA' not in key
            } for element in responseContent['data']]

        responseContent.update({'data': datapoints})

        return responseContent
Exemplo n.º 14
0
    def get_models_by_type(
        self,
        model_type: Optional[ModelType] = None,
    ):
        """This will return a JSON object with the metadata of multiple models, filtered by the provided `model_type`.

        Args :
            model_type (ModelType) :

        ```
            "predictive"
            "evolutive"
            "generative"
             None
        ```

        Returns :
            () :

        ```json
        {
            "message":
            "Submission success.",
            "data": [{
                "id": "1",
                "labId": "1",
                "modelType": "evolutive",
                "name": "My First Evolutive Model",
                "description": "This is an example model",
                "status": "completed-successfully",
                "evolveModelInfo": {
                    "microserviceQueueId":
                    "1",
                    "dataSchema": [{
                        "id": "1",
                        "name": "Descriptor1",
                        "value_type": "numeric",
                        "type": "descriptor"
                    }, {
                        "id": "1",
                        "name": "Descriptor2",
                        "value_type": "numeric",
                        "type": "descriptor"
                    }, {
                        "id": "2",
                        "name": "Target",
                        "value_type": "numeric",
                        "type": "target"
                    }],
                    "modelStats": {
                        "MAE": 45
                    }
                }
            }, {
                "id": "2",
                "labId": "1",
                "modelType": "evolutive",
                "name": "My Second Evolutive Model",
                "description": "This is an example model",
                "status": "completed-successfully",
                "evolveModelInfo": {
                    "microserviceQueueId":
                    "1",
                    "dataSchema": [{
                        "id": "1",
                        "name": "Descriptor1",
                        "value_type": "numeric",
                        "type": "descriptor"
                    }, {
                        "id": "1",
                        "name": "Descriptor2",
                        "value_type": "numeric",
                        "type": "descriptor"
                    }, {
                        "id": "2",
                        "name": "Target",
                        "value_type": "numeric",
                        "type": "target"
                    }],
                    "modelStats": {
                        "MAE": 40
                    }
                }
            }]
        }

        ```
        """
        if model_type not in ALLOWED_MODEL_TYPES:
            raise ValueError(
                f'Type: {model_type} not in {ALLOWED_MODEL_TYPES}')

        # body = {
        #     'modelType': 'null' if model_type is None else model_type,
        # }
        body = {
            'modelType': model_type,
        }
        response = post(url=self.get_models_by_type_url,
                        headers=self.headers,
                        json=body)
        response['content'] = json.loads(response['content'])
        return self._get_data_from_content(response['content'])