Exemplo n.º 1
0
def calculate_colls_bg(coll_args):
    """
    Background collocations calculation.
    This function is expected to be run either
    from Celery or from other process (via multiprocessing).
    """
    cm = corplib.CorpusManager(subcpath=coll_args.subcpath)
    corp = cm.get_Corpus(coll_args.corpname, subcname=coll_args.subcname)
    try:
        # try to fetch precalculated data; if none then MissingSubCorpFreqFile
        corplib.frq_db(corp, coll_args.cattr)
        conc = get_conc(corp=corp, user_id=coll_args.user_id, q=coll_args.q,
                        fromp=0, pagesize=0, asnc=0, save=coll_args.save, samplesize=coll_args.samplesize)
        if not conc.finished():
            raise UnfinishedConcordanceError(
                _('Cannot calculate yet - source concordance not finished. Please try again later.'))
        collocs = conc.collocs(cattr=coll_args.cattr, csortfn=coll_args.csortfn, cbgrfns=coll_args.cbgrfns,
                               cfromw=coll_args.cfromw, ctow=coll_args.ctow, cminfreq=coll_args.cminfreq,
                               cminbgr=coll_args.cminbgr, max_lines=coll_args.num_fetch_items)
        for item in collocs['Items']:
            item['pfilter'] = [('q2', item['pfilter'])]
            item['nfilter'] = [('q2', item['nfilter'])]
        return dict(data=collocs, processing=0, tasks=[])
    except corplib.MissingSubCorpFreqFile as e:
        ans = {'attrname': coll_args.cattr, 'tasks': []}
        out = freq_calc.build_arf_db(e.corpus, coll_args.cattr)
        if type(out) is list:
            processing = 1
            ans['tasks'].extend(out)
        else:
            processing = 0
        ans['processing'] = processing
        ans['data'] = dict(Items=[], Head=[])
        return ans
Exemplo n.º 2
0
def calc_freqs_bg(args):
    """
    Calculate actual frequency data.

    arguments:
    args -- a FreqCalsArgs instance

    returns:
    a dict(freqs=..., conc_size=...)
    """

    cm = corplib.CorpusManager(subcpath=args.subcpath)
    corp = cm.get_Corpus(args.corpname, args.subcname)
    conc = conclib.get_conc(corp=corp,
                            user_id=args.user_id,
                            minsize=args.minsize,
                            q=args.q,
                            fromp=args.fromp,
                            pagesize=args.pagesize,
                            async=0,
                            save=args.save,
                            samplesize=args.samplesize)
    if not conc.finished():
        raise UnfinishedConcordanceError(
            _('Cannot calculate yet - source concordance not finished. Please try again later.'
              ))
    freqs = [
        conc.xfreq_dist(cr, args.flimit, args.freq_sort, args.ml,
                        args.ftt_include_empty, args.rel_mode,
                        args.collator_locale) for cr in args.fcrit
    ]
    return dict(freqs=freqs, conc_size=conc.size())