-
Notifications
You must be signed in to change notification settings - Fork 0
/
neiro.py
97 lines (72 loc) · 2.81 KB
/
neiro.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# -*- coding: utf-8 -*-
import sys
import telebot
import numpy as np
import pickle
import re
from Stemmer import Stemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
API_TOKEN = '503561866:AAGgyO922mNjLAC2j_wcqnltSuvo6K1rtpg'
bot = telebot.TeleBot(API_TOKEN)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# очистка текста с помощью regexp приведение слов в инфинитив и нижний регистр, замена цифр
def text_cleaner(text):
text = text.lower() # приведение в lowercase
stemmer = Stemmer('russian')
text = ' '.join( stemmer.stemWords( text.split() ) )
text = re.sub( r'\b\d+\b', ' digit ', text ) # замена цифр
return text
# - - - - - - - - - - - - - - - - - - - - - - - - -
# загрузка данных из файла 1.txt
#
def load_data():
data = { 'text':[],'tag':[] }
for line in open('1.txt', encoding='utf8'):
if(not('#' in line)):
row = line.split("@")
data['text'] += [row[0]]
data['tag'] += [row[1]]
return data
# - - - - - - - - - - - - - - - - - - - - - - - - -
# Обучение нейросети
def train_test_split( data, validation_split = 0.1):
sz = len(data['text'])
indices = np.arange(sz)
np.random.shuffle(indices)
X = [ data['text'][i] for i in indices ]
Y = [ data['tag'][i] for i in indices ]
nb_validation_samples = int( validation_split * sz )
return {
'train': { 'x': X[:-nb_validation_samples], 'y': Y[:-nb_validation_samples] },
'test': { 'x': X[-nb_validation_samples:], 'y': Y[-nb_validation_samples:] }
}
# - - - - - - - - - - - - - - - - - - - -
def openai(z,message):
data = load_data()
D = train_test_split( data )
text_clf = Pipeline([
('tfidf', TfidfVectorizer()),
('clf', SGDClassifier(loss='hinge')),
])
text_clf.fit(D['train']['x'], D['train']['y'])
predicted = text_clf.predict( D['train']['x'] )
# Начало тестирования программы
#z=input("Введите вопрос без знака вопроса на конце: ")
#z="Кто полетел в космо"
zz=[]
zz.append(z)
predicted = text_clf.predict( zz )
print(predicted[0])
bot.reply_to(message, (predicted[0]))
# Handle '/start' and '/help'
@bot.message_handler(commands=['help', 'start'])
def send_welcome(message):
bot.reply_to(message, "Привет")
@bot.message_handler(func=lambda message: True)
def send_message(message):
q=message.text
openai(q,message)
bot.polling()