Exemplo n.º 1
0
from yatk import ir
from yatk.ml.svm import SVM as Classifier
# from yatk.ml.nb import NaiveBayes as Classifier

con = sqlite3.connect('test.db')
con.row_factory = sqlite3.Row
cur = con.cursor()

docs = []
cur.execute('select class, text from docs')
for row in cur.fetchall():
	docs.append((row['class'], row['text']))

index = ir.SentimentIndex('delta', 'bogram')
index.get_class = lambda x: x[0]
index.get_text = lambda x: x[1]
index.build(docs)

x = []
y = []
for doc in docs:
	x.append(index.weight(index.features(doc)))
	y.append(doc[0])

cl = Classifier()
cl.train(x, y)
cl.save('test.svm')

index.save('test.index')

con.close()
Exemplo n.º 2
0
class Guesser:
    def __init__(self):
        self._cl = Classifier()

    def is_candidate(self, word):
        return True

    def make_class(self, word):
        pass

    def traverse(self, sentences):
        x = []
        y = []
        for sentence in sentences:
            for w in range(0, len(sentence)):
                word = sentence[w]

                if not self.is_candidate(word):
                    continue

                x.append(self.gen_features(sentence, w))
                y.append(self.make_class(word))

        return (x, y)

    def train(self, sentences):
        (train_x, train_y) = self.traverse(sentences)
        self._cl.train(train_x, train_y)

    def predict(self, sentences):
        (test_x, test_y) = self.traverse(sentences)
        return (self._cl.predict(test_x), test_y)

    def test(self, sentences):
        (estim_y, test_y) = self.predict(sentences)
        return self._cl.evaluate(test_y, estim_y)

    def guess(self, word):
        return self._cl.predict([self.gen_features([(word, )], 0)])[0]

    def gen_features(self, sentence, w):
        word = sentence[w][0]
        x = {}

        x['p3:' + word[0:3]] = 1
        x['p4:' + word[0:4]] = 1
        x['p5:' + word[0:5]] = 1
        x['p6:' + word[0:6]] = 1
        #		x['s1:' + word[-1:]] = 1
        x['s2:' + word[-2:]] = 1
        x['s3:' + word[-3:]] = 1
        x['s4:' + word[-4:]] = 1
        x['s5:' + word[-5:]] = 1
        x['w:' + word] = 1

        for i in range(1, 4):
            if w > i - 1:
                word = sentence[w - i][0]
                #				x[str(i) + 'p3:' + prev[0:3]] = 1
                #				x[str(i) + 'p4:' + prev[0:4]] = 1
                #				x[str(i) + 'p5:' + prev[0:5]] = 1
                #				x[str(i) + 'p6:' + prev[0:6]] = 1
                #		x['s1:' + word[-1:]] = 1
                x[str(i) + 's2:' + word[-2:]] = 1
                x[str(i) + 's3:' + word[-3:]] = 1
                x[str(i) + 's4:' + word[-4:]] = 1
                #				x[str(i) + 's5:' + prev[-5:]] = 1
                x[str(i) + 'w:' + word] = 1

        for i in range(1, 2):
            if w + i < len(sentence) - 1:
                word = sentence[w + i][0]
                #				x[str(i) + 'p3:' + prev[0:3]] = 1
                #				x[str(i) + 'p4:' + prev[0:4]] = 1
                #				x[str(i) + 'p5:' + prev[0:5]] = 1
                #				x[str(i) + 'p6:' + prev[0:6]] = 1
                #		x['s1:' + word[-1:]] = 1
                x[str(i) + '+s2:' + word[-2:]] = 1
                x[str(i) + '+s3:' + word[-3:]] = 1
                x[str(i) + '+s4:' + word[-4:]] = 1
                #				x[str(i) + 's5:' + prev[-5:]] = 1
                x[str(i) + '+w:' + word] = 1

        return x

    def save(self, path):
        self._cl.save(path)

    @staticmethod
    def load(path):
        obj = Guesser()
        obj._cl = Classifier.load(path)
        return obj
Exemplo n.º 3
0
from yatk import ir
from yatk.ml.svm import SVM as Classifier
# from yatk.ml.nb import NaiveBayes as Classifier

con = sqlite3.connect('test.db')
con.row_factory = sqlite3.Row
cur = con.cursor()

docs = []
cur.execute('select class, text from docs')
for row in cur.fetchall():
    docs.append((row['class'], row['text']))

index = ir.SentimentIndex('delta', 'bogram')
index.get_class = lambda x: x[0]
index.get_text = lambda x: x[1]
index.build(docs)

x = []
y = []
for doc in docs:
    x.append(index.weight(index.features(doc)))
    y.append(doc[0])

cl = Classifier()
cl.train(x, y)
cl.save('test.svm')

index.save('test.index')

con.close()
Exemplo n.º 4
0
class Guesser:
	def __init__(self):
		self._cl = Classifier()
		
	def is_candidate(self, word):
		return True
		
	def make_class(self, word):
		pass
		
	def traverse(self, sentences):
		x = []
		y = []
		for sentence in sentences:
			for w in range(0, len(sentence)):
				word = sentence[w]
				
				if not self.is_candidate(word):
					continue
				
				x.append(self.gen_features(sentence, w))
				y.append(self.make_class(word))
				
		return (x, y)
	
	def train(self, sentences):
		(train_x, train_y) = self.traverse(sentences)
		self._cl.train(train_x, train_y)
	
	def predict(self, sentences):
		(test_x, test_y) = self.traverse(sentences)
		return (self._cl.predict(test_x), test_y)
	
	def test(self, sentences):
		(estim_y, test_y) = self.predict(sentences)
		return self._cl.evaluate(test_y, estim_y)
	
	def guess(self, word):
		return self._cl.predict([self.gen_features([(word,)], 0)])[0]
	
	def gen_features(self, sentence, w):
		word = sentence[w][0]
		x = {}
		
		x['p3:' + word[0:3]] = 1
		x['p4:' + word[0:4]] = 1
		x['p5:' + word[0:5]] = 1
		x['p6:' + word[0:6]] = 1
#		x['s1:' + word[-1:]] = 1
		x['s2:' + word[-2:]] = 1
		x['s3:' + word[-3:]] = 1
		x['s4:' + word[-4:]] = 1
		x['s5:' + word[-5:]] = 1
		x['w:' + word] = 1
		
		for i in range(1, 4):
			if w > i - 1:
				word = sentence[w - i][0]
#				x[str(i) + 'p3:' + prev[0:3]] = 1
#				x[str(i) + 'p4:' + prev[0:4]] = 1
#				x[str(i) + 'p5:' + prev[0:5]] = 1
#				x[str(i) + 'p6:' + prev[0:6]] = 1
		#		x['s1:' + word[-1:]] = 1
				x[str(i) + 's2:' + word[-2:]] = 1
				x[str(i) + 's3:' + word[-3:]] = 1
				x[str(i) + 's4:' + word[-4:]] = 1
#				x[str(i) + 's5:' + prev[-5:]] = 1
				x[str(i) + 'w:' + word] = 1
				
		for i in range(1, 2):
			if w + i < len(sentence) - 1:
				word = sentence[w + i][0]
#				x[str(i) + 'p3:' + prev[0:3]] = 1
#				x[str(i) + 'p4:' + prev[0:4]] = 1
#				x[str(i) + 'p5:' + prev[0:5]] = 1
#				x[str(i) + 'p6:' + prev[0:6]] = 1
		#		x['s1:' + word[-1:]] = 1
				x[str(i) + '+s2:' + word[-2:]] = 1
				x[str(i) + '+s3:' + word[-3:]] = 1
				x[str(i) + '+s4:' + word[-4:]] = 1
#				x[str(i) + 's5:' + prev[-5:]] = 1
				x[str(i) + '+w:' + word] = 1
		
		return x
	
	def save(self, path):
		self._cl.save(path)
		
	@staticmethod
	def load(path):
		obj = Guesser()
		obj._cl = Classifier.load(path)
		return obj