示例#1
0
def ngram(wordex, period, ddk=None):
    if " " in word:
        bigram = word.split()[:2]
        res = nb.bigram(first=bigram[0],
                        second=bigram[1],
                        ddk=ddk,
                        period=period)
    else:
        res = nb.unigram(word, period=period, ddk=ddk)
    return res
示例#2
0
def sumword(words, period, media='bok'):
    wordlist = [x.strip() for x in words.split(',')]
    # check if trailing comma, or comma in succession, if so count comma in
    if '' in wordlist:
        wordlist = [','] + [y for y in wordlist if y != '']
    ref = pd.concat(
        [nb.unigram(w, media=media, period=period) for w in wordlist],
        axis=1).sum(axis=1)
    ref.columns = ["tot"]
    return ref
示例#3
0
def ngram(word, ddk, subject, period):
    if " " in word:
        bigram = word.split()[:2]
        res = nb.bigram(first=bigram[0],
                        second=bigram[1],
                        ddk=ddk,
                        topic=subject,
                        period=period)
    else:
        res = nb.unigram(word, ddk=ddk, topic=subject, period=period)
    return res
示例#4
0
def sumword(words, period, media='bok', lang='nob'):
    wordlist = [x.strip() for x in words.split(',')]
    # check if trailing comma, or comma in succession, if so count comma in
    if '' in wordlist:
        wordlist = [','] + [y for y in wordlist if y != '']
    ref = pd.concat([
        nb.unigram(w, media=media, period=period, lang=lang) for w in wordlist
    ],
                    axis=1).sum(axis=1)
    ref.index = pd.to_datetime(ref.index, format='%Y')
    return ref
示例#5
0
def ngavis(word, period):
    try:
        if " " in word:
            bigram = word.split()[:2]
            res = nb.frame(
                nb.bigram(first=bigram[0],
                          second=bigram[1],
                          period=period,
                          media='avis'), word)
        else:
            res = nb.frame(nb.unigram(word, period=period, media='avis'), word)
        #st.write(res.head())
    except:
        res = pd.DataFrame()
    return res
示例#6
0
def ngbok(word, period, ddk=None, lang='nob'):
    try:
        if " " in word:
            bigram = word.split()[:2]
            res = nb.frame(
                nb.bigram(first=bigram[0],
                          second=bigram[1],
                          ddk=ddk,
                          period=period,
                          media='bok',
                          lang=lang), word)
        else:
            res = nb.frame(
                nb.unigram(word,
                           period=period,
                           ddk=ddk,
                           media='bok',
                           lang=lang), word)
    except:
        res = pd.DataFrame()
    return res
示例#7
0
def ngavis(x, period):
    try:
        r = nb.frame(nb.unigram(x, period, media='avis'), x)
    except:
        r = pd.DataFrame()
    return r
示例#8
0
def ngbok(x, period, ddk=None):
    try:
        r = nb.frame(nb.unigram(x, period, media='bok', ddk=ddk), x)
    except:
        r = pd.DataFrame()
    return r