Python dbdir示例，sep.context.dbdir Python示例

示例#1

0

显示文件

文件： sepgen.py 项目： takada-at/sep

def cooccurence():
    logger.info('vocabrary load...')
    vocab = nltkwrapper.Vocab(context)
    ranking = vocab.vocab(300)
    stems = vocab.stemmer.getdict().items()
    logger.info('load words coocurence...')
    cooc = vocab.cooccurence(dict(ranking).keys()).items()
    cooc.sort(lambda x,y:cmp(y[1],x[1]))
    cooc = [(k0,k1,c) for ((k0,k1),c) in cooc]
    logger.info('save...')
    savecsv(os.path.join(context.dbdir(), 'ranking.csv'), ranking)
    savecsv(os.path.join(context.dbdir(), 'cooccurence.csv'), cooc)
    savecsv(os.path.join(context.dbdir(), 'stem.csv'), stems)

示例#2

0

显示文件

def cooccurence():
    logger.info('vocabrary load...')
    vocab = nltkwrapper.Vocab(context)
    ranking = vocab.vocab(300)
    stems = vocab.stemmer.getdict().items()
    logger.info('load words coocurence...')
    cooc = vocab.cooccurence(dict(ranking).keys()).items()
    cooc.sort(lambda x, y: cmp(y[1], x[1]))
    cooc = [(k0, k1, c) for ((k0, k1), c) in cooc]
    logger.info('save...')
    savecsv(os.path.join(context.dbdir(), 'ranking.csv'), ranking)
    savecsv(os.path.join(context.dbdir(), 'cooccurence.csv'), cooc)
    savecsv(os.path.join(context.dbdir(), 'stem.csv'), stems)

示例#3

0

显示文件

文件： ranking.py 项目： takada-at/sep

def saveranking(ranking):
    dbdir = context.dbdir()
    with io.open(os.path.join(dbdir, 'ranking.csv'), 'w') as wio:
        for word, freq in ranking:
            wio.write(u"\t".join([word, unicode(freq)]) + u"\n")

    return ranking

示例#4

0

显示文件

def bibliocount():
    counter = biblio.count()
    items = counter.items()
    items.sort(lambda x, y: cmp(y[1], x[1]))
    items = [[author, year, title, count]
             for ((author, year, title), count) in items]
    savecsv(os.path.join(context.dbdir(), 'biblio.csv'), items)

示例#5

0

显示文件

def saveranking(ranking):
    dbdir = context.dbdir()
    with io.open(os.path.join(dbdir, 'ranking.csv'), 'w') as wio:
        for word, freq in ranking:
            wio.write(u"\t".join([word, unicode(freq)]) + u"\n")

    return ranking

示例#6

0

显示文件

文件： ranking.py 项目： takada-at/sep

def loadranking():
    dirname = context.dbdir()
    ranking = []
    with io.open(os.path.join(dirname, 'ranking.csv')) as fio:
        for line in fio:
            word,count = line.rstrip().split("\t")
            ranking.append((word, int(count)))

    return ranking

示例#7

0

显示文件

def loadranking():
    dirname = context.dbdir()
    ranking = []
    with io.open(os.path.join(dirname, 'ranking.csv')) as fio:
        for line in fio:
            word, count = line.rstrip().split("\t")
            ranking.append((word, int(count)))

    return ranking

示例#8

0

显示文件

def preparedir():
    dirs = [
        context.datadir(),
        context.graphdir(),
        context.dbdir(),
        context.textdatadir()
    ]
    for dirname in dirs:
        if not os.path.exists(dirname):
            os.mkdir(dirname)

示例#9

0

显示文件

文件： ranking.py 项目： takada-at/sep

def loadorgword():
    dirname = context.dbdir()
    dic = dict()
    with io.open(os.path.join(dirname, 'stem.csv')) as fio:
        for line in fio:
            stem,org = line.rstrip().split("\t")
            dic[stem] = org

    stemmer = nltkwrapper.Stem()
    stemmer.loaddata(dic)
    return stemmer

示例#10

0

显示文件

def loadorgword():
    dirname = context.dbdir()
    dic = dict()
    with io.open(os.path.join(dirname, 'stem.csv')) as fio:
        for line in fio:
            stem, org = line.rstrip().split("\t")
            dic[stem] = org

    stemmer = nltkwrapper.Stem()
    stemmer.loaddata(dic)
    return stemmer

示例#11

0

显示文件

文件： graph.py 项目： takada-at/sep

def load(limit=1300):
    stemmer = ranking.loadorgword()
    ranks = ranking.loadtaggedranking(stemmer)
    ranks = [(w, cnt) for w,tag,cnt in ranks]
    rankdict = dict(ranks)
    words = set(w[0] for w in ranks)
    dbdir = context.dbdir()
    edges = []
    with io.open(os.path.join(dbdir,'cooccurence.csv')) as fio:
        for line in fio:
            word0,word1,count = line.rstrip().split("\t")
            word0 = stemmer.orgword(word0)
            word1 = stemmer.orgword(word1)
            if word0 in words and word1 in words:
                edges.append((word0,word1,int(count)))

    edges.sort(lambda x,y:cmp(y[2],x[2]))
    edges = [(w0,w1,k) for (w0,w1,k) in edges if k>limit]
    return (edges, rankdict)

示例#12

0

显示文件

def load(limit=1300):
    stemmer = ranking.loadorgword()
    ranks = ranking.loadtaggedranking(stemmer)
    ranks = [(w, cnt) for w, tag, cnt in ranks]
    rankdict = dict(ranks)
    words = set(w[0] for w in ranks)
    dbdir = context.dbdir()
    edges = []
    with io.open(os.path.join(dbdir, 'cooccurence.csv')) as fio:
        for line in fio:
            word0, word1, count = line.rstrip().split("\t")
            word0 = stemmer.orgword(word0)
            word1 = stemmer.orgword(word1)
            if word0 in words and word1 in words:
                edges.append((word0, word1, int(count)))

    edges.sort(lambda x, y: cmp(y[2], x[2]))
    edges = [(w0, w1, k) for (w0, w1, k) in edges if k > limit]
    return (edges, rankdict)

示例#13

0

显示文件

文件： sepgen.py 项目： takada-at/sep

def ranking():
    vocab = nltkwrapper.Vocab(context)
    ranking = vocab.vocab(300)
    stems = vocab.stemmer.getdict().items()
    savecsv(os.path.join(context.dbdir(), 'ranking.csv'), ranking)
    savecsv(os.path.join(context.dbdir(), 'stem.csv'), stems)

示例#14

0

显示文件

def graphdata():
    data = graph.load()
    savecsv(os.path.join(context.dbdir(), 'graphdata.csv'), data)

示例#15

0

显示文件

文件： sepgen.py 项目： takada-at/sep

def nnranking():
    ranks = rankmod.loadnnranking()
    savecsv(os.path.join(context.dbdir(), 'nnranking.csv'), ranks)

示例#16

0

显示文件

文件： sepgen.py 项目： takada-at/sep

def taggedranking():
    ranks = rankmod.loadtaggedranking()
    savecsv(os.path.join(context.dbdir(), 'taggedranking.csv'), ranks)

示例#17

0

显示文件

文件： sepgen.py 项目： takada-at/sep

def graphdata():
    data = graph.load()
    savecsv(os.path.join(context.dbdir(), 'graphdata.csv'), data)

示例#18

0

显示文件

文件： sepgen.py 项目： takada-at/sep

def bibliocount():
    counter = biblio.count()
    items = counter.items()
    items.sort(lambda x,y: cmp(y[1],x[1]))
    items = [[author, year, title, count] for ((author, year, title), count) in items]
    savecsv(os.path.join(context.dbdir(), 'biblio.csv'), items)

示例#19

0

显示文件

文件： sepgen.py 项目： takada-at/sep

def preparedir():
    dirs = [context.datadir(), context.graphdir(), context.dbdir(), context.textdatadir()]
    for dirname in dirs:
        if not os.path.exists(dirname):
            os.mkdir(dirname)

示例#20

0

显示文件

def taggedranking():
    ranks = rankmod.loadtaggedranking()
    savecsv(os.path.join(context.dbdir(), 'taggedranking.csv'), ranks)

示例#21

0

显示文件

def nnranking():
    ranks = rankmod.loadnnranking()
    savecsv(os.path.join(context.dbdir(), 'nnranking.csv'), ranks)

示例#22

0

显示文件

def ranking():
    vocab = nltkwrapper.Vocab(context)
    ranking = vocab.vocab(300)
    stems = vocab.stemmer.getdict().items()
    savecsv(os.path.join(context.dbdir(), 'ranking.csv'), ranking)
    savecsv(os.path.join(context.dbdir(), 'stem.csv'), stems)