Exemplo n.º 1
0
    def query(self, qstring):
        words = fenci.solve(qstring)
        if len(words) == 0: return
        
        #get word ids
        wordids = []
        tables = 0
        tablelist = ''
        clauselist = ''
        fieldlist = 'w0.urlid'
        for word in words:
            row = self.con.execute(
                "select rowid from wordlist where word='%s'" % word
            ).fetchone()
            
            if row != None:
                wordid = row[0]
                wordids.append(wordid)
                if tables > 0:
                    tablelist += ','
                    clauselist += ' and '
                    clauselist += 'w%d.urlid=w%d.urlid and ' % (tables-1, tables)
                fieldlist += ',w%d.location' % tables
                tablelist += 'wordlocation w%d' % tables
                clauselist += 'w%d.wordid=%d' % (tables, wordid)
                tables += 1

        # full sql
        fullsql = 'select %s from %s where %s' % (fieldlist, tablelist, clauselist)
        print 'SQL: ', fullsql
        cur = self.con.execute(fullsql)
        rows = [row for row in cur]
        return rows, wordids
Exemplo n.º 2
0
Arquivo: feed.py Projeto: mlnotes/hfml
def getwords():
	allwords = {}
	articlewords = []
	articletitles = []
	ec = 0
	for feed in feedlist:
		f = fp.parse(feed)
		for e in f.entries:
			if e.title in articletitles: continue

			#get words
			words = fenci.solve(e.title.encode('utf8') + e.description.encode('utf8'),
								fenci.mmseg)
			articletitles.append(e.title)
			articlewords.append({})

			for w in words:
				allwords.setdefault(w, 0)
				allwords[w] += 1
				articlewords[ec].setdefault(w, 0)
				articlewords[ec][w] += 1
			ec += 1
	return allwords, articlewords, articletitles
Exemplo n.º 3
0
def segment(doc):
    result = fenci.solve(doc)
    return result.keys()