def idlangs(self, tokens):
        """
        Return whether a language is present and the counts from each wordlist.

        @param tokens sequence of tokens to process
        @returns array of booleans for language presence and tuples of wordlist scores

        The ordering of return vectors will match the order of languages given
        at initialization.
        """
        # Create scores
        short_scores = numpy.zeros(self.nlangs, dtype=numpy.int)
        long_scores = numpy.zeros(self.nlangs, dtype=numpy.int)

        # Count each language
        for token in tokens:
            for idx in range(self.nlangs):
                if token in self.shorts[idx]:
                    short_scores[idx] += 1
                if token in self.longs[idx]:
                    long_scores[idx] += 1

        # Decide whether each language is there
        langspresent = [(short_scores[idx] > 1 or 
                         (short_scores[idx] == 1 and long_scores[idx] > 0))
                        for idx in range(self.nlangs)]

        # Codeswitching verdict
        cs = cs_langspresent(langspresent)

        # Give the number of hits in the wordlists
        hits = zip(short_scores, long_scores)
        lid = self._pick_lang(hits)
        return (lid, langspresent, hits, cs)
    def idlangs(self, tokens):
        """
        Return whether a language is present and the counts from each wordlist.
        @param tokens: tokens to identify
        """
        # Per-token ratios and langs
        ratios = [self._ratios.get(token, RatioListLID.UNK_WORD_RATIO) for token in tokens]
        langs = [self._ratio_lang(ratio) if not non_lid(token) else None
                 for ratio, token in zip(ratios, tokens)]

        # Count hits, making a copy with no UNKNOWN_LANG as well
        hits = [langs.count(lang) for lang in self.langs]
        known_lang_hits = hits[:-1]
        unknown_hits = hits[-1]
        
        hitcount = sum(hits)
        unk_rate = unknown_hits / hitcount if hitcount else 1.0
        langspresent = [(langhits >= present_min) 
                        for langhits, present_min in zip(known_lang_hits, self.present_mins)]

        # Zero out langspresent based on unknown rate
        langspresent[0] = langspresent[0] and (unk_rate <= self.lang1_max_unk_rate)
        langspresent[1] = langspresent[1] and (unk_rate <= self.lang2_max_unk_rate)
    
        # If we're under the acceptable unknown rate, we can have codeswitching
        cs = cs_langspresent(langspresent) if (unk_rate <= self.cs_max_unk_rate) else False
        
        # Compute LID based on the greatest number of hits that passed thresholds
        lid = self._pick_lang([hit if present else 0 
                               for hit, present in zip(known_lang_hits, langspresent)])
        
        return (lid, langspresent, hits, ratios, langs, unk_rate, cs)
    def idlangs(self, tokens, lowmethod, unkmethod, tags=None):
        """
        Return whether a language is present and the counts from each wordlist.
        @param tokens: tokens to identify
        @param tags: optional Jerboa tags for the tokens
        """
        # Per-token ratios and langs
        ratios = [self._ratios.get(token, RatioListLID.UNK_WORD_RATIO) for token in tokens]
        langs = [self._ratio_lang(ratio) if not non_lid(token) else None
                 for ratio, token in zip(ratios, tokens)]

        # Put in dummy tags if needed
        if not tags:
            tags = [JERBOA_NOTAG] * len(tokens)

        # Choose langs for
        langs = [choose_lang(token, lang, self.langs, tag, ratio, lowmethod, unkmethod, False)
                     for token, tag, lang, ratio in zip(tokens, tags, langs, ratios)]

        # Clean out any remaining unknowns
        if None in langs:
            langs = choose_unk_lang(langs, unkmethod)

        # Count hits, making a copy with no UNKNOWN_LANG as well
        hits = [langs.count(lang) for lang in self.langs]
        known_lang_hits = hits[:-1]
        unknown_hits = hits[-1]
        
        hitcount = sum(hits)
        unk_rate = unknown_hits / hitcount if hitcount else 1.0
        langspresent = [(langhits >= present_min) 
                        for langhits, present_min in zip(known_lang_hits, self.present_mins)]

        # Zero out langspresent based on unknown rate
        langspresent[0] = langspresent[0] and (unk_rate <= self.lang1_max_unk_rate)
        langspresent[1] = langspresent[1] and (unk_rate <= self.lang2_max_unk_rate)
    
        # If we're under the acceptable unknown rate, we can have codeswitching
        cs = cs_langspresent(langspresent) if (unk_rate <= self.cs_max_unk_rate) else False
        
        # Compute LID based on the greatest number of hits that passed thresholds
        lid = self._pick_lang([hit if present else 0 
                               for hit, present in zip(known_lang_hits, langspresent)])
        
        return (lid, langspresent, hits, ratios, langs, unk_rate, cs)