Exemplo n.º 1
0
    def collocs(self, cattr='-', csortfn='m', cbgrfns='mt', cfromw=-5, ctow=5, cminfreq=5, cminbgr=3, max_lines=0):
        statdesc = {'t': translate('T-score'),
                    'm': translate('MI'),
                    '3': translate('MI3'),
                    'l': translate('log likelihood'),
                    's': translate('min. sensitivity'),
                    'p': translate('MI.log_f'),
                    'r': translate('relative freq. [%]'),
                    'f': translate('absolute freq.'),
                    'd': translate('logDice')
                    }
        items = []
        colls = manatee.CollocItems(self, cattr, csortfn, cminfreq, cminbgr,
                                    cfromw, ctow, max_lines)
        qfilter = '%%s%i %i 1 [%s="%%s"]' % (cfromw, ctow, cattr)
        i = 0
        while not colls.eos():
            if 0 < max_lines < i:
                break
            items.append(dict(
                str=colls.get_item(),
                freq=colls.get_cnt(),
                Stats=[{'s': '%.3f' % colls.get_bgr(s)} for s in cbgrfns],
                pfilter=qfilter % ('P', escape(self.import_string(colls.get_item()))),
                nfilter=qfilter % ('N', escape(self.import_string(colls.get_item())))
            ))
            colls.next()
            i += 1

        head = [{'n': ''}, {'n': 'Freq', 's': 'f'}] + \
            [{'n': statdesc.get(s, s), 's': s} for s in cbgrfns]
        return dict(Head=head, Items=items)
Exemplo n.º 2
0
    def get_query(self):
            """
            returns:
            a list of tuples (struct, condition); strings are encoded to the encoding current
            corpus uses!
            """
            scas = [(a[4:], self._access_fn(self._src_obj, a))
                    for a in self._attr_producer_fn(self._src_obj) if a.startswith('sca_')]
            structs = {}
            for sa, v in scas:
                if type(v) in (str, unicode) and '|' in v:
                    v = v.split('|')
                s, a = sa.split('.')
                if type(v) is list:
                    expr_items = []
                    for v1 in v:
                        expr_items.append('%s="%s"' % (a, l10n.escape(v1)))
                    if len(expr_items) > 0:
                        query = '(%s)' % ' | '.join(expr_items)
                    else:
                        query = None
                else:
                    query = '%s="%s"' % (a, l10n.escape(v))

                if query is not None:  # TODO: is the following encoding change always OK?
                    query = l10n.export_string(query, to_encoding=self._corp.get_conf('ENCODING'))
                    if s in structs:
                        structs[s].append(query)
                    else:
                        structs[s] = [query]
            return [(sname, ' & '.join(subquery)) for sname, subquery in structs.items()]
Exemplo n.º 3
0
    def get_query(self):
        """
            returns:
            a list of tuples (struct, condition); strings are encoded to the encoding current
            corpus uses!
            """
        scas = [(a[4:], self._access_fn(self._src_obj, a))
                for a in self._attr_producer_fn(self._src_obj)
                if a.startswith('sca_')]
        structs = {}
        for sa, v in scas:
            if type(v) in (str, unicode) and '|' in v:
                v = v.split('|')
            s, a = sa.split('.')
            if type(v) is list:
                expr_items = []
                for v1 in v:
                    expr_items.append('%s="%s"' % (a, l10n.escape(v1)))
                if len(expr_items) > 0:
                    query = '(%s)' % ' | '.join(expr_items)
                else:
                    query = None
            else:
                query = '%s="%s"' % (a, l10n.escape(v))

            if query is not None:  # TODO: is the following encoding change always OK?
                query = l10n.export_string(
                    query, to_encoding=self._corp.get_conf('ENCODING'))
                if s in structs:
                    structs[s].append(query)
                else:
                    structs[s] = [query]
        return [(sname, ' & '.join(subquery))
                for sname, subquery in structs.items()]
Exemplo n.º 4
0
    def collocs(self,
                cattr='-',
                csortfn='m',
                cbgrfns='mt',
                cfromw=-5,
                ctow=5,
                cminfreq=5,
                cminbgr=3,
                from_idx=0,
                max_lines=50):
        statdesc = {
            't': 'T-score',
            'm': 'MI',
            '3': 'MI3',
            'l': 'log likelihood',
            's': 'min. sensitivity',
            'p': 'MI.log_f',
            'r': 'relative freq. [%]',
            'f': 'absolute freq.',
            'd': 'logDice',
        }

        items = []
        colls = manatee.CollocItems(self, cattr, csortfn, cminfreq, cminbgr,
                                    cfromw, ctow, 2**29)
        qfilter = '%%s%i %i 1 [%s="%%s"]' % (cfromw, ctow, cattr)
        i = 0
        while not colls.eos():
            if from_idx <= i < from_idx + max_lines:
                items.append({
                    'str':
                    colls.get_item(),
                    'freq':
                    colls.get_cnt(),
                    'Stats': [{
                        's': '%.3f' % colls.get_bgr(s)
                    } for s in cbgrfns],
                    'pfilter':
                    qfilter %
                    ('P', escape(self.import_string(colls.get_item()))),
                    'nfilter':
                    qfilter %
                    ('N', escape(self.import_string(colls.get_item())))
                })
            colls.next()
            i += 1

        head = [{'n': ''}, {'n': 'Freq', 's': 'f'}] \
            + [{'n': statdesc.get(s, s), 's': s} for s in cbgrfns]
        return {
            'Head': head,
            'Items': self.add_block_items(items),
            'Total': i,
            'TotalPages': int(math.ceil(i / float(max_lines)))
        }
Exemplo n.º 5
0
    def collocs(self, cattr='-', csortfn='m', cbgrfns='mt',
                cfromw=-5, ctow=5, cminfreq=5, cminbgr=3, from_idx=0, max_lines=50):
        statdesc = {'t': 'T-score',
                    'm': 'MI',
                    '3': 'MI3',
                    'l': 'log likelihood',
                    's': 'min. sensitivity',
                    'p': 'MI.log_f',
                    'r': 'relative freq. [%]',
                    'f': 'absolute freq.',
                    'd': 'logDice',
                    }

        items = []
        colls = manatee.CollocItems(self, cattr, csortfn, cminfreq, cminbgr,
                                    cfromw, ctow, 2 ** 29)
        qfilter = '%%s%i %i 1 [%s="%%s"]' % (cfromw, ctow, cattr)
        i = 0
        while not colls.eos():
            if from_idx <= i < from_idx + max_lines:
                items.append(
                    {'str': colls.get_item(), 'freq': colls.get_cnt(),
                     'Stats': [{'s': '%.3f' % colls.get_bgr(s)}
                               for s in cbgrfns],
                     'pfilter': qfilter % ('P', escape(self.import_string(colls.get_item()))),
                     'nfilter': qfilter % ('N', escape(self.import_string(colls.get_item())))
                     })
            colls.next()
            i += 1

        head = [{'n': ''}, {'n': 'Freq', 's': 'f'}] \
            + [{'n': statdesc.get(s, s), 's': s} for s in cbgrfns]
        return {
            'Head': head,
            'Items': self.add_block_items(items),
            'Total': i,
            'TotalPages': int(math.ceil(i / float(max_lines)))
        }