Exemplo n.º 1
0
 def reset(self):
     """
     Clean observations of the language model use reset
     """
     lm_server.post_json_request(self.server_config, 'reset')
     self.priors = defaultdict(list)
     log.info("\ncleaning history\n")
Exemplo n.º 2
0
 def init(self, nbest: int = 1):
     """
     Initialize the language model (on the server side)
     Input:
         nbest - top N symbols from evidence
     """
     lm_server.post_json_request(self.server_config,
                                 'init',
                                 data={'nbest': nbest})
Exemplo n.º 3
0
 def init(self, nbest: int = 1):
     """
     Initialize the language model (on the server side)
     Input:
         nbest - top N symbols from evidence
     """
     if not isinstance(nbest, int):
         raise NBestError(nbest)
     if nbest > 4:
         raise NBestHighValue(nbest)
     lm_server.post_json_request(self.server_config,
                                 'init',
                                 data={'nbest': nbest})
Exemplo n.º 4
0
    def recent_priors(self, return_mode='letter'):
        """
        Display the priors given the recent decision
        """

        if not bool(self.priors[return_mode]):
            output = lm_server.post_json_request(self.server_config,
                                                 'recent_priors',
                                                 {'return_mode': return_mode})
            return self.__return_priors(output, return_mode)
        else:
            return self.priors
Exemplo n.º 5
0
    def state_update(self, evidence: List, return_mode: str = 'letter'):
        """
        Provide a prior distribution of the language model
        in return to the system's decision regarding the
        last observation
        Both lm types allow providing more the one timestep
        input. Pay attention to the data struct expected.
        OCLM
        Input:
            evidence - a list of (list of) tuples [[(sym1, prob), (sym2, prob2)]]
            the numbers are assumed to be in the log probabilty domain
            return_mode - 'letter' or 'word' (available
                          for oclm) strings
        Output:
            priors - a json dictionary with Normalized priors
                     in the Negative Log probabilty domain.
        """

        # assert the input contains a valid symbol
        try:
            clean_evidence = []
            for tmp_evidence in evidence:
                tmp = []
                for (symbol, pr) in tmp_evidence:
                    assert symbol in ALPHABET, \
                        "%r contains invalid symbol" % evidence
                    if symbol == SPACE_CHAR:
                        tmp.append((LM_SPACE, pr))
                    else:
                        tmp.append((symbol.lower(), pr))
                clean_evidence.append(tmp)
        except BaseException:
            raise EvidenceDataStructError

        output = lm_server.post_json_request(self.server_config,
                                             'state_update', {
                                                 'evidence': clean_evidence,
                                                 'return_mode': return_mode
                                             })
        return self.__return_priors(output, return_mode)
Exemplo n.º 6
0
    def state_update(self, evidence: List, return_mode: str = 'letter'):
        """
        Provide a prior distribution of the language model
        in return to the system's decision regarding the
        last observation
        Both lm types allow providing more the one timestep
        input. Pay attention to the data struct expected.

        Input:
            decision - a symbol or a string of symbols in encapsulated in a
            list
            the numbers are assumed to be in the log probability domain
            return_mode - 'letter' or 'word' (available
                          for oclm) strings
        Output:
            priors - a json dictionary with Normalized priors
                     in the Negative Log probability domain.
        """
        assert return_mode == 'letter', "PRELM only allows letter output"
        # assert the input contains a valid symbol
        decision = evidence  # in prelm the we treat it as a decision
        for symbol in decision:
            assert symbol in ALPHABET or ' ', \
                "%r contains invalid symbol" % decision
        clean_evidence = []
        for symbol in decision:
            if symbol == SPACE_CHAR:
                symbol = LM_SPACE
            clean_evidence.append(symbol.lower())

        output = lm_server.post_json_request(self.server_config,
                                             'state_update', {
                                                 'evidence': clean_evidence,
                                                 'return_mode': return_mode
                                             })

        return self.__return_priors(output, return_mode)