Exemplo n.º 1
0
    def token_series(cls, token, corpus=None, pos=None):
        """Get an offset -> count series for a word.

        Args:
            token (str)
            corpus (str)
            pos (str)

        Returns: OrderedDict
        """
        query = (session.query(cls.bin, func.sum(cls.count)).filter(
            cls.token == token).group_by(cls.bin).order_by(cls.bin))

        if corpus:
            query = query.filter(cls.corpus == corpus)

        if pos:
            query = query.filter(cls.pos == pos)

        series = np.zeros(100)

        for offset, count in query:
            series[offset] = count

        return series
Exemplo n.º 2
0
    def pos_tags(cls):
        """Get a list of all POS tags.

        Returns: set
        """
        query = session.query(distinct(cls.pos))

        return sorted([r[0] for r in query.all()])
Exemplo n.º 3
0
    def token_counts(cls, min_count=0):
        """Get total (un-bucketed) token counts.

        Args:
            min_count (int)

        Returns: OrderedDict
        """
        query = (session.query(cls.token, func.sum(cls.count)).group_by(
            cls.token).having(func.sum(cls.count) > min_count).order_by(
                func.sum(cls.count).desc()))

        return OrderedDict(query.all())
Exemplo n.º 4
0
    def token_pos_counts(cls, min_count=0):
        """Get total (token, pos) counts.

        Args:
            min_count (int)

        Returns: OrderedDict
        """
        query = (session.query(
            cls.token, cls.pos, func.sum(cls.count)).group_by(
                cls.token,
                cls.pos).having(func.sum(cls.count) > min_count).order_by(
                    func.sum(cls.count).desc()))

        return OrderedDict([((token, pos), count)
                            for token, pos, count in query.all()])
Exemplo n.º 5
0
    def pos_series(cls, tag, corpus=None):
        """Get an offset -> count series for a POS tag.

        Args:
            tag (str)
            corpus (str)

        Returns: OrderedDict
        """
        query = (session.query(cls.bin, func.sum(cls.count)).filter(
            cls.pos == tag).group_by(cls.bin).order_by(cls.bin))

        if corpus:
            query = query.filter(cls.corpus == corpus)

        series = np.zeros(100)

        for offset, count in query:
            series[offset] = count

        return series