def collocs(self, cattr='-', csortfn='m', cbgrfns='mt', cfromw=-5, ctow=5, cminfreq=5, cminbgr=3, max_lines=0): statdesc = {'t': translate('T-score'), 'm': translate('MI'), '3': translate('MI3'), 'l': translate('log likelihood'), 's': translate('min. sensitivity'), 'p': translate('MI.log_f'), 'r': translate('relative freq. [%]'), 'f': translate('absolute freq.'), 'd': translate('logDice') } items = [] colls = manatee.CollocItems(self, cattr, csortfn, cminfreq, cminbgr, cfromw, ctow, max_lines) qfilter = '%%s%i %i 1 [%s="%%s"]' % (cfromw, ctow, cattr) i = 0 while not colls.eos(): if 0 < max_lines < i: break items.append(dict( str=colls.get_item(), freq=colls.get_cnt(), Stats=[{'s': '%.3f' % colls.get_bgr(s)} for s in cbgrfns], pfilter=qfilter % ('P', escape(self.import_string(colls.get_item()))), nfilter=qfilter % ('N', escape(self.import_string(colls.get_item()))) )) colls.next() i += 1 head = [{'n': ''}, {'n': 'Freq', 's': 'f'}] + \ [{'n': statdesc.get(s, s), 's': s} for s in cbgrfns] return dict(Head=head, Items=items)
def get_query(self): """ returns: a list of tuples (struct, condition); strings are encoded to the encoding current corpus uses! """ scas = [(a[4:], self._access_fn(self._src_obj, a)) for a in self._attr_producer_fn(self._src_obj) if a.startswith('sca_')] structs = {} for sa, v in scas: if type(v) in (str, unicode) and '|' in v: v = v.split('|') s, a = sa.split('.') if type(v) is list: expr_items = [] for v1 in v: expr_items.append('%s="%s"' % (a, l10n.escape(v1))) if len(expr_items) > 0: query = '(%s)' % ' | '.join(expr_items) else: query = None else: query = '%s="%s"' % (a, l10n.escape(v)) if query is not None: # TODO: is the following encoding change always OK? query = l10n.export_string(query, to_encoding=self._corp.get_conf('ENCODING')) if s in structs: structs[s].append(query) else: structs[s] = [query] return [(sname, ' & '.join(subquery)) for sname, subquery in structs.items()]
def get_query(self): """ returns: a list of tuples (struct, condition); strings are encoded to the encoding current corpus uses! """ scas = [(a[4:], self._access_fn(self._src_obj, a)) for a in self._attr_producer_fn(self._src_obj) if a.startswith('sca_')] structs = {} for sa, v in scas: if type(v) in (str, unicode) and '|' in v: v = v.split('|') s, a = sa.split('.') if type(v) is list: expr_items = [] for v1 in v: expr_items.append('%s="%s"' % (a, l10n.escape(v1))) if len(expr_items) > 0: query = '(%s)' % ' | '.join(expr_items) else: query = None else: query = '%s="%s"' % (a, l10n.escape(v)) if query is not None: # TODO: is the following encoding change always OK? query = l10n.export_string( query, to_encoding=self._corp.get_conf('ENCODING')) if s in structs: structs[s].append(query) else: structs[s] = [query] return [(sname, ' & '.join(subquery)) for sname, subquery in structs.items()]
def collocs(self, cattr='-', csortfn='m', cbgrfns='mt', cfromw=-5, ctow=5, cminfreq=5, cminbgr=3, from_idx=0, max_lines=50): statdesc = { 't': 'T-score', 'm': 'MI', '3': 'MI3', 'l': 'log likelihood', 's': 'min. sensitivity', 'p': 'MI.log_f', 'r': 'relative freq. [%]', 'f': 'absolute freq.', 'd': 'logDice', } items = [] colls = manatee.CollocItems(self, cattr, csortfn, cminfreq, cminbgr, cfromw, ctow, 2**29) qfilter = '%%s%i %i 1 [%s="%%s"]' % (cfromw, ctow, cattr) i = 0 while not colls.eos(): if from_idx <= i < from_idx + max_lines: items.append({ 'str': colls.get_item(), 'freq': colls.get_cnt(), 'Stats': [{ 's': '%.3f' % colls.get_bgr(s) } for s in cbgrfns], 'pfilter': qfilter % ('P', escape(self.import_string(colls.get_item()))), 'nfilter': qfilter % ('N', escape(self.import_string(colls.get_item()))) }) colls.next() i += 1 head = [{'n': ''}, {'n': 'Freq', 's': 'f'}] \ + [{'n': statdesc.get(s, s), 's': s} for s in cbgrfns] return { 'Head': head, 'Items': self.add_block_items(items), 'Total': i, 'TotalPages': int(math.ceil(i / float(max_lines))) }
def collocs(self, cattr='-', csortfn='m', cbgrfns='mt', cfromw=-5, ctow=5, cminfreq=5, cminbgr=3, from_idx=0, max_lines=50): statdesc = {'t': 'T-score', 'm': 'MI', '3': 'MI3', 'l': 'log likelihood', 's': 'min. sensitivity', 'p': 'MI.log_f', 'r': 'relative freq. [%]', 'f': 'absolute freq.', 'd': 'logDice', } items = [] colls = manatee.CollocItems(self, cattr, csortfn, cminfreq, cminbgr, cfromw, ctow, 2 ** 29) qfilter = '%%s%i %i 1 [%s="%%s"]' % (cfromw, ctow, cattr) i = 0 while not colls.eos(): if from_idx <= i < from_idx + max_lines: items.append( {'str': colls.get_item(), 'freq': colls.get_cnt(), 'Stats': [{'s': '%.3f' % colls.get_bgr(s)} for s in cbgrfns], 'pfilter': qfilter % ('P', escape(self.import_string(colls.get_item()))), 'nfilter': qfilter % ('N', escape(self.import_string(colls.get_item()))) }) colls.next() i += 1 head = [{'n': ''}, {'n': 'Freq', 's': 'f'}] \ + [{'n': statdesc.get(s, s), 's': s} for s in cbgrfns] return { 'Head': head, 'Items': self.add_block_items(items), 'Total': i, 'TotalPages': int(math.ceil(i / float(max_lines))) }