示例#1
0
def fill_redis():
    redisdb = redis.Redis(REDIS_SERVER, REDIS_PORT)
    senf = open("sentences.csv")
    for line in senf:
        id, lang, sen = line.strip().split('\t')

        token = pyplus1.lang_parse(lang, sen)
        if token:
            tokenbuf = ','.join(token)
            redisdb.hset(lang, int(id), tokenbuf)

    linkf = open("data/links.csv")
    for line in linkf:
        nlang, nid, tlang, tid = line.split('\t')

        if nlang < tlang:  # nlang sorts before tlang
            merged = (nid) | (tid << 32)
            setname = "%s-%s" % (nlang, tlang)
        else:
            merged = (tid) | (nid << 32)
            setname = "%s-%s" % (tlang, nlang)

        redisdb.sadd(setname, merged)
示例#2
0
def fill_redis():
    redisdb = redis.Redis(REDIS_SERVER, REDIS_PORT)
    senf = open("sentences.csv")
    for line in senf:
        id, lang, sen = line.strip().split('\t')
        
        token = pyplus1.lang_parse(lang, sen)
        if token:
            tokenbuf = ','.join(token)
            redisdb.hset(lang, int(id), tokenbuf)


    linkf = open("data/links.csv")
    for line in linkf:
        nlang, nid, tlang, tid = line.split('\t')
        
        if nlang < tlang: # nlang sorts before tlang
            merged = (nid) | (tid << 32)
            setname = "%s-%s" % (nlang, tlang)
        else:
            merged = (tid) | (nid << 32)
            setname = "%s-%s" % (tlang, nlang)

        redisdb.sadd(setname, merged)
示例#3
0
 def add_sentence(self, lang, id, sentence):
     words = pyplus1.lang_parse(lang, sentence.encode('utf-8'))
     
     out = '.'.join(words)
     self.redisdb.hset(lang, id, out)
示例#4
0
 def parse_text(self, text):
     for line in text:
         words = pyplus1.lang_parse(self.tlang.encode('utf-8'), line.encode('utf-8'))
         for w in words:
             self.add_word(w)