Exemplo n.º 1
0
def callSentenceGen_com(reply,k):
	#with codecs.open("strarf_serif.txt","rb","utf-8") as f:
	#with codecs.open("Japanese.0304.text.non-mentions.txt","rb","utf-8") as f:
	with codecs.open("strarf_serif.txt","rb","utf-8") as f:
		srctxt=[]
		for line in f:
			srctxt.append(line)
			srctxt.append(u"EOS")
	bytesrctxt=" ".join(srctxt)
	freq1=trigramModelGenerator.generateModel(srctxt)
	cpickler.topickle(freq1)
	keyword1=keywordext.extraction(reply,freq1)
	sentenceGen=sentenceGenerator.sentenceGenerator(freq1)
	sentence=sentenceGen.generateSentence(keyword1)
	return sentence
Exemplo n.º 2
0
def generate_unigram_model(favs):
    try:
        freqfav = cpickler.frompickle(filename='favs_model_unigram.dump')
    except:
        freqfav = {}

    esc = [u'…', u'・', u'.', u',', u'、', u'。', u'!', u'?', u'!', u'?']
    for favlist in favs:
        for fav in favlist:
            itemlist = mecabCaller.parse(fav.text)
            for itemraw in itemlist:
                item = [x for x in itemraw if x in esc]
                if item in freqfav:
                    freqfav[item] += 1
                else:
                    freqfav[item] = 1
    cpickler.topickle(freqfav, filename='favs_model_unigram.dump')

    return freqfav