Пример #1
0
 def ajax_subcorp_info(self, subcname=''):
     sc = self.cm.get_Corpus(self.args.corpname, subcname)
     return {
         'subCorpusName': subcname,
         'corpusSize': format_number(sc.size()),
         'subCorpusSize': format_number(sc.search_size())
     }
Пример #2
0
    def ajax_get_corp_details(self, request):
        """
        """
        corp_conf_info = plugins.get('corparch').get_corpus_info(request.args['corpname'])
        corpus = self.cm.get_Corpus(request.args['corpname'])
        encoding = corpus.get_conf('ENCODING')

        ans = {
            'corpname': l10n.import_string(self._canonical_corpname(corpus.get_conf('NAME')),
                                           from_encoding=encoding),
            'description': l10n.import_string(corpus.get_info(), from_encoding=encoding),
            'size': l10n.format_number(int(corpus.size())),
            'attrlist': [],
            'structlist': [],
            'web_url': corp_conf_info['web'] if corp_conf_info is not None else ''
        }
        try:
            ans['attrlist'] = [{'name': item, 'size': l10n.format_number(int(corpus.get_attr(item).id_range()))}
                               for item in corpus.get_conf('ATTRLIST').split(',')]
        except RuntimeError as e:
            logging.getLogger(__name__).warn('%s' % e)
            ans['attrlist'] = {'error': _('Failed to load')}
        ans['structlist'] = [{'name': item, 'size': l10n.format_number(int(corpus.get_struct(item).size()))}
                             for item in corpus.get_conf('STRUCTLIST').split(',')]
        return ans
Пример #3
0
    def ajax_get_corp_details(self, request):
        """
        """
        corp_conf_info = self.get_corpus_info(request.args['corpname'])
        corpus = self.cm.get_Corpus(request.args['corpname'])
        citation_info = corp_conf_info.get('citation_info', None)
        citation_info = citation_info.to_dict() if citation_info else {}

        import_str = partial(l10n.import_string, from_encoding=corpus.get_conf('ENCODING'))

        if corpus.get_conf('NAME'):
            corpus_name = corpus.get_conf('NAME')
        else:
            corpus_name = corpus.corpname

        ans = {
            'corpname': import_str(corpus_name),
            'description': import_str(corpus.get_info()),
            'size': l10n.format_number(int(corpus.size())),
            'attrlist': [],
            'structlist': [],
            'web_url': corp_conf_info['web'] if corp_conf_info is not None else '',
            'citation_info': citation_info
        }
        try:
            ans['attrlist'] = [{'name': item, 'size': l10n.format_number(int(corpus.get_attr(item).id_range()))}
                               for item in corpus.get_conf('ATTRLIST').split(',')]
        except RuntimeError as e:
            logging.getLogger(__name__).warn('%s' % e)
            ans['attrlist'] = {'error': _('Failed to load')}
        ans['structlist'] = [{'name': item, 'size': l10n.format_number(int(corpus.get_struct(item).size()))}
                             for item in corpus.get_conf('STRUCTLIST').split(',')]
        return ans
Пример #4
0
 def ajax_subcorp_info(self, subcname=''):
     sc = self.cm.get_Corpus(self.args.corpname, subcname)
     ans = {
         'corpusName': self._canonical_corpname(self.args.corpname),
         'subCorpusName': subcname,
         'corpusSize': format_number(sc.size()),
         'subCorpusSize': format_number(sc.search_size()),
         'extended_info': {}
     }
     if plugins.has_plugin('subc_restore'):
         tmp = plugins.get('subc_restore').get_info(self._session_get('user', 'id'),
                                                    self.args.corpname, subcname)
         if tmp:
             ans['extended_info'].update(tmp)
     return ans
Пример #5
0
 def format_data_types(data):
     if type(data) is dict:
         for k in data.keys():
             if type(data[k]) is str and data[k].isdigit():
                 data[k] = int(data[k])
             if type(data[k]) is int or type(data[k]) is float:
                 data[k] = l10n.format_number(data[k])
     return data
Пример #6
0
 def format_data_types(data):
     if type(data) is dict:
         for k in data.keys():
             if type(data[k]) is str and data[k].isdigit():
                 data[k] = int(data[k])
             if type(data[k]) is int or type(data[k]) is float:
                 data[k] = l10n.format_number(data[k])
     return data
Пример #7
0
    def ajax_get_corp_details(self, request):
        """
        """
        corp_conf_info = plugins.get('corparch').get_corpus_info(
            request.args['corpname'])
        corpus = self.cm.get_Corpus(request.args['corpname'])
        citation_info = corp_conf_info.get('citation_info', None)
        citation_info = citation_info.to_dict() if citation_info else {}

        import_str = partial(l10n.import_string,
                             from_encoding=corpus.get_conf('ENCODING'))

        if corpus.get_conf('NAME'):
            corpus_name = corpus.get_conf('NAME')
        else:
            corpus_name = self._canonical_corpname(corpus.corpname)

        ans = {
            'corpname': import_str(corpus_name),
            'description': import_str(corpus.get_info()),
            'size': l10n.format_number(int(corpus.size())),
            'attrlist': [],
            'structlist': [],
            'web_url':
            corp_conf_info['web'] if corp_conf_info is not None else '',
            'citation_info': citation_info
        }
        try:
            ans['attrlist'] = [{
                'name':
                item,
                'size':
                l10n.format_number(int(corpus.get_attr(item).id_range()))
            } for item in corpus.get_conf('ATTRLIST').split(',')]
        except RuntimeError as e:
            logging.getLogger(__name__).warn('%s' % e)
            ans['attrlist'] = {'error': _('Failed to load')}
        ans['structlist'] = [{
            'name':
            item,
            'size':
            l10n.format_number(int(corpus.get_struct(item).size()))
        } for item in corpus.get_conf('STRUCTLIST').split(',')]
        return ans
Пример #8
0
    def export_with_norms(self, subcorpattrs='', format_num=True, ret_nums=True, subcnorm='tokens'):
        """
        Returns a text types table containing also an information about
        total occurrences of respective attribute values.

        See corplib.texttype_values for arguments and returned value
        """
        ans = {}
        if not subcorpattrs:
            subcorpattrs = self._corp.get_conf('SUBCORPATTRS')
            if not subcorpattrs:
                subcorpattrs = self._corp.get_conf('FULLREF')
        if not subcorpattrs or subcorpattrs == '#':
            raise TextTypesException(
                _('Missing display configuration of structural attributes (SUBCORPATTRS or FULLREF).'))

        corpus_info = plugins.get('corparch').get_corpus_info(self._corpname)
        maxlistsize = settings.get_int('global', 'max_attr_list_size')
        # if 'live_attributes' are installed then always shrink bibliographical
        # entries even if their count is < maxlistsize
        subcorp_attr_list = re.split(r'\s*[,|]\s*', subcorpattrs)

        if plugins.has_plugin('live_attributes'):
            ans['bib_attr'] = corpus_info['metadata']['label_attr']
            list_none = (ans['bib_attr'], )
            tmp = [s for s in subcorp_attr_list]  # making copy here
            if ans['bib_attr'] and ans['bib_attr'] not in tmp:  # if bib type is not in subcorpattrs
                tmp.append(ans['bib_attr'])                     # we add it there
                subcorpattrs = '|'.join(tmp)  # we ignore NoSkE '|' vs. ',' stuff deliberately here
        else:
            ans['bib_attr'] = None
            list_none = ()

        tt = self._tt_cache.get_values(corp=self._corp, subcorpattrs=subcorpattrs, maxlistsize=maxlistsize,
                                       shrink_list=list_none, collator_locale=corpus_info.collator_locale)
        self._add_tt_custom_metadata(tt)

        if ret_nums:
            struct_calc = collections.OrderedDict()
            for item in subcorp_attr_list:
                k = item.split('.')[0]
                struct_calc[k] = CachedStructNormsCalc(self._corp, k, subcnorm, db=plugins.get('db'))
            for col in reduce(lambda p, c: p + c['Line'], tt, []):
                if 'textboxlength' not in col:
                    structname, attrname = col['name'].split('.')
                    for val in col['Values']:
                        v = struct_calc[structname].compute_norm(attrname, val['v'])
                        val['xcnt'] = l10n.format_number(v) if format_num else v
            ans['Blocks'] = tt
            ans['Normslist'] = self._get_normslist(struct_calc.keys()[0])
        else:
            ans['Blocks'] = tt
            ans['Normslist'] = []
        return ans
Пример #9
0
 def ajax_subcorp_info(self, subcname=''):
     sc = self.cm.get_Corpus(self.args.corpname, subcname=subcname)
     ans = dict(corpusId=self.args.corpname,
                corpusName=self._human_readable_corpname(),
                subCorpusName=subcname,
                origSubCorpusName=sc.orig_subcname
                if sc.is_published else subcname,
                corpusSize=format_number(sc.size()),
                subCorpusSize=format_number(sc.search_size()),
                created=time.strftime(l10n.datetime_formatting(),
                                      sc.created.timetuple()),
                description=sc.description,
                extended_info={})
     if plugins.runtime.SUBC_RESTORE.exists:
         with plugins.runtime.SUBC_RESTORE as sr:
             tmp = sr.get_info(self.session_get('user', 'id'),
                               self.args.corpname, subcname)
             if tmp:
                 ans['extended_info'].update(tmp)
     return ans
Пример #10
0
 def ajax_subcorp_info(self, subcname=''):
     sc = self.cm.get_Corpus(self.args.corpname, subcname)
     ans = {
         'corpusName':
         self._canonical_corpname(self.args.corpname),
         'subCorpusName':
         subcname,
         'corpusSize':
         format_number(sc.size()),
         'subCorpusSize':
         format_number(sc.search_size()),
         'created':
         time.strftime(l10n.datetime_formatting(), sc.created.timetuple()),
         'extended_info': {}
     }
     if plugins.runtime.SUBC_RESTORE.exists:
         with plugins.runtime.SUBC_RESTORE as sr:
             tmp = sr.get_info(self.session_get('user', 'id'),
                               self.args.corpname, subcname)
             if tmp:
                 ans['extended_info'].update(tmp)
     return ans
Пример #11
0
    def export_with_norms(self,
                          subcorpattrs='',
                          format_num=True,
                          ret_nums=True,
                          subcnorm='tokens'):
        """
        Returns a text types table containing also an information about
        total occurrences of respective attribute values.

        See corplib.texttype_values for arguments and returned value
        """
        ans = {}
        if not subcorpattrs:
            subcorpattrs = self._corp.get_conf('SUBCORPATTRS')
            if not subcorpattrs:
                subcorpattrs = self._corp.get_conf('FULLREF')
        if not subcorpattrs or subcorpattrs == '#':
            raise TextTypesException(
                _('Missing display configuration of structural attributes (SUBCORPATTRS or FULLREF).'
                  ))

        corpus_info = plugins.get('corparch').get_corpus_info(self._corpname)
        maxlistsize = settings.get_int('global', 'max_attr_list_size')
        # if 'live_attributes' are installed then always shrink bibliographical
        # entries even if their count is < maxlistsize
        subcorp_attr_list = re.split(r'\s*[,|]\s*', subcorpattrs)

        if plugins.has_plugin('live_attributes'):
            ans['bib_attr'] = corpus_info['metadata']['label_attr']
            list_none = (ans['bib_attr'], )
            tmp = [s for s in subcorp_attr_list]  # making copy here
            if ans['bib_attr'] and ans[
                    'bib_attr'] not in tmp:  # if bib type is not in subcorpattrs
                tmp.append(ans['bib_attr'])  # we add it there
                subcorpattrs = '|'.join(
                    tmp)  # we ignore NoSkE '|' vs. ',' stuff deliberately here
        else:
            ans['bib_attr'] = None
            list_none = ()

        tt = self._tt_cache.get_values(
            corp=self._corp,
            subcorpattrs=subcorpattrs,
            maxlistsize=maxlistsize,
            shrink_list=list_none,
            collator_locale=corpus_info.collator_locale)
        self._add_tt_custom_metadata(tt)

        if ret_nums:
            struct_calc = collections.OrderedDict()
            for item in subcorp_attr_list:
                k = item.split('.')[0]
                struct_calc[k] = CachedStructNormsCalc(self._corp,
                                                       k,
                                                       subcnorm,
                                                       db=plugins.get('db'))
            for col in reduce(lambda p, c: p + c['Line'], tt, []):
                if 'textboxlength' not in col:
                    structname, attrname = col['name'].split('.')
                    for val in col['Values']:
                        v = struct_calc[structname].compute_norm(
                            attrname, val['v'])
                        val['xcnt'] = l10n.format_number(
                            v) if format_num else v
            ans['Blocks'] = tt
            ans['Normslist'] = self._get_normslist(struct_calc.keys()[0])
        else:
            ans['Blocks'] = tt
            ans['Normslist'] = []
        return ans
Пример #12
0
    def get_attr_values(self, corpus, attr_map, aligned_corpora=None):
        """
        Finds all the available values of remaining attributes according to the
        provided attr_map and aligned_corpora

        arguments:
        corpus -- manatee.corpus object
        attr_map -- a dictionary of attributes and values as selected by a user
        aligned_corpora - a list/tuple of corpora names aligned to base one (the 'corpus' argument)

        returns:
        a dictionary containing matching attributes and values
        """
        corpname = vanilla_corpname(corpus.corpname)
        corpus_info = self.corparch.get_corpus_info(corpname)
        bib_label = LiveAttributes.import_key(corpus_info.metadata.label_attr)
        bib_id = LiveAttributes.import_key(corpus_info.metadata.id_attr)
        attrs = self._get_subcorp_attrs(corpus)

        if bib_label and bib_label not in attrs:
            attrs.append(bib_label)

        srch_attrs = set(attrs) - set(
            self.import_key(k)
            for k in attr_map.keys() if type(attr_map[k]) is not dict)
        srch_attrs.add('poscount')

        hidden_attrs = set()
        if bib_id is not None and bib_id not in srch_attrs:
            hidden_attrs.add(bib_id)
        if not bib_id:
            hidden_attrs.add('id')

        selected_attrs = tuple(srch_attrs.union(hidden_attrs))

        # a map [db_col_name]=>[db_col_idx]
        srch_attr_map = dict([(x[1], x[0]) for x in enumerate(selected_attrs)])

        attr_items = AttrArgs(attr_map, self.empty_val_placeholder)
        where_sql, where_values = attr_items.export_sql('t1', corpname)

        join_sql = []
        i = 2
        for item in aligned_corpora:
            join_sql.append('JOIN item AS t%d ON t1.item_id = t%d.item_id' %
                            (i, i))
            where_sql += ' AND t%d.corpus_id = ?' % i
            where_values.append(item)
            i += 1

        if len(where_sql) > 0:
            sql_template = "SELECT DISTINCT %s FROM item AS t1 %s WHERE %s" \
                           % (', '.join(self.apply_prefix(selected_attrs, 't1')), ' '.join(join_sql), where_sql)
        else:
            sql_template = "SELECT DISTINCT %s FROM item AS t1 %s " \
                           % (', '.join(self.apply_prefix(selected_attrs, 't1')), ' '.join(join_sql))

        ans = {}
        # already selected items are part of the answer; no need to fetch them from db
        ans.update(dict([(self.import_key(k), v)
                         for k, v in attr_map.items()]))
        range_attrs = set()

        for attr in ans.keys():
            if type(ans[attr]) is dict:
                ans[attr] = set(
                )  # currently we throw away the range and load all the stuff
                range_attrs.add(attr)

        for attr in srch_attrs:
            if attr in ('poscount', ):
                ans[attr] = 0
            else:
                ans[attr] = set()

        poscounts = defaultdict(lambda: defaultdict(lambda: 0))
        max_visible_chars = self.calc_max_attr_val_visible_chars(corpus_info)

        for item in self.db(corpname).execute(sql_template,
                                              *where_values).fetchall():
            for attr in selected_attrs:
                v = item[srch_attr_map[attr]]
                if v is not None and attr not in hidden_attrs:
                    attr_val = None
                    if attr == bib_label:
                        attr_val = (self.shorten_value(
                            unicode(v), length=max_visible_chars),
                                    item[srch_attr_map[bib_id]], unicode(v))
                    elif type(ans[attr]) is set:
                        attr_val = (self.shorten_value(
                            unicode(v), length=max_visible_chars), v, v)
                    elif type(ans[attr]) is int:
                        ans[attr] += int(v)

                    if attr_val is not None:
                        poscounts[attr][attr_val] += item['poscount']

        # here we append position count information to the respective items
        for attr, v in poscounts.items():
            for k, c in v.items():
                ans[attr].add(k + (l10n.format_number(c), ))
            del poscounts[attr]

        exported = {}
        collator_locale = corpus_info.collator_locale

        for k in ans.keys():
            if type(ans[k]) is set:
                if len(ans[k]) <= self.max_attr_list_size or k in range_attrs:
                    if k == bib_label:
                        out_data = l10n.sort(ans[k],
                                             collator_locale,
                                             key=lambda t: t[0])
                    else:
                        out_data = tuple(
                            l10n.sort(ans[k],
                                      collator_locale,
                                      key=lambda t: t[0]))
                    exported[self.export_key(k)] = out_data
                else:
                    exported[self.export_key(k)] = {'length': len(ans[k])}

            else:
                exported[self.export_key(k)] = ans[k]
        exported['poscount'] = l10n.format_number(exported['poscount'])
        exported['aligned'] = aligned_corpora
        return exported
Пример #13
0
    def get_attr_values(self, corpus, attr_map, aligned_corpora=None):
        """
        Finds all the available values of remaining attributes according to the
        provided attr_map and aligned_corpora

        arguments:
        corpus -- manatee.corpus object
        attr_map -- a dictionary of attributes and values as selected by a user
        aligned_corpora - a list/tuple of corpora names aligned to base one (the 'corpus' argument)

        returns:
        a dictionary containing matching attributes and values
        """
        corpname = vanilla_corpname(corpus.corpname)
        corpus_info = self.corparch.get_corpus_info(corpname)
        bib_label = LiveAttributes.import_key(corpus_info.metadata.label_attr)
        bib_id = LiveAttributes.import_key(corpus_info.metadata.id_attr)
        attrs = self._get_subcorp_attrs(corpus)

        if bib_label and bib_label not in attrs:
            attrs.append(bib_label)

        srch_attrs = set(attrs) - set(self.import_key(k)
                                      for k in attr_map.keys() if type(attr_map[k]) is not dict)
        srch_attrs.add('poscount')

        hidden_attrs = set()
        if bib_id is not None and bib_id not in srch_attrs:
            hidden_attrs.add(bib_id)
        if not bib_id:
            hidden_attrs.add('id')

        selected_attrs = tuple(srch_attrs.union(hidden_attrs))

        # a map [db_col_name]=>[db_col_idx]
        srch_attr_map = dict([(x[1], x[0]) for x in enumerate(selected_attrs)])

        attr_items = AttrArgs(attr_map, self.empty_val_placeholder)
        where_sql, where_values = attr_items.export_sql('t1', corpname)

        join_sql = []
        i = 2
        for item in aligned_corpora:
            join_sql.append('JOIN item AS t%d ON t1.item_id = t%d.item_id' % (i, i))
            where_sql += ' AND t%d.corpus_id = ?' % i
            where_values.append(item)
            i += 1

        if len(where_sql) > 0:
            sql_template = "SELECT DISTINCT %s FROM item AS t1 %s WHERE %s" \
                           % (', '.join(self.apply_prefix(selected_attrs, 't1')), ' '.join(join_sql), where_sql)
        else:
            sql_template = "SELECT DISTINCT %s FROM item AS t1 %s " \
                           % (', '.join(self.apply_prefix(selected_attrs, 't1')), ' '.join(join_sql))

        ans = {}
        # already selected items are part of the answer; no need to fetch them from db
        ans.update(dict([(self.import_key(k), v) for k, v in attr_map.items()]))
        range_attrs = set()

        for attr in ans.keys():
            if type(ans[attr]) is dict:
                ans[attr] = set()   # currently we throw away the range and load all the stuff
                range_attrs.add(attr)

        for attr in srch_attrs:
            if attr in ('poscount',):
                ans[attr] = 0
            else:
                ans[attr] = set()

        poscounts = defaultdict(lambda: defaultdict(lambda: 0))
        max_visible_chars = self.calc_max_attr_val_visible_chars(corpus_info)

        for item in self.db(corpname).execute(sql_template, *where_values).fetchall():
            for attr in selected_attrs:
                v = item[srch_attr_map[attr]]
                if v is not None and attr not in hidden_attrs:
                    attr_val = None
                    if attr == bib_label:
                        attr_val = (self.shorten_value(unicode(v), length=max_visible_chars),
                                    item[srch_attr_map[bib_id]], unicode(v))
                    elif type(ans[attr]) is set:
                        attr_val = (self.shorten_value(unicode(v), length=max_visible_chars), v, v)
                    elif type(ans[attr]) is int:
                        ans[attr] += int(v)

                    if attr_val is not None:
                        poscounts[attr][attr_val] += item['poscount']

        # here we append position count information to the respective items
        for attr, v in poscounts.items():
            for k, c in v.items():
                ans[attr].add(k + (l10n.format_number(c),))
            del poscounts[attr]

        exported = {}
        collator_locale = corpus_info.collator_locale

        for k in ans.keys():
            if type(ans[k]) is set:
                if len(ans[k]) <= self.max_attr_list_size or k in range_attrs:
                    if k == bib_label:
                        out_data = l10n.sort(ans[k], collator_locale, key=lambda t: t[0])
                    else:
                        out_data = tuple(l10n.sort(ans[k], collator_locale, key=lambda t: t[0]))
                    exported[self.export_key(k)] = out_data
                else:
                    exported[self.export_key(k)] = {'length': len(ans[k])}

            else:
                exported[self.export_key(k)] = ans[k]
        exported['poscount'] = l10n.format_number(exported['poscount'])
        exported['aligned'] = aligned_corpora
        return exported
Пример #14
0
 def filter(self, val, **kw):
     if val:
         return format_number(val, mask='%01.2f')
     return str(val)
Пример #15
0
 def filter(self, val, **kw):
     if val:
         return format_number(val)
     return str(val)
Пример #16
0
 def filter(self, val, **kw):
     if val:
         return format_number(val, mask='%01.2f')
     return str(val)
Пример #17
0
 def filter(self, val, **kw):
     if val:
         return format_number(val)
     return str(val)