def updateSource(self): logD("Запись изменений в \"" + self.source + "\"...") fl = open(self.source, "w") for i in self.diallines: fl.write(str(i) + "\n") fl.close() logD("\"" + self.source + "\" обновлён")
def fixTxtIfNeed(txt): arr = utils.str2arr(txt) for w in arr: if (not utils.getStartForm(w) in utils.dictionw): logD("Ask Ya.Speller...") return utils.checkText(txt) return txt
def rutoen(q): url="https://translate.yandex.net/api/v1.5/tr.json/translate?key="+APIkeys.YandexTranslate+"&text"+urllib.parse.urlencode([("",q)])+"&lang=ru-en&format=plain" try: j=getJSON(url) return j['text'][0] except Exception as e: logD("Yatranslate error: "+str(e)) return ""
def getAnswerByDial(self, arr): #Нет ничего более постоянного, чем временное... txt = arr[-1].lower() txt = txt.replace("курису", "").replace("макисе", "").replace( "макисэ", "").replace("амадей", "").replace("amadeus", "").replace("амадеус", "") txtarr = utils.str2arr(txt) if (("что лучше" in txt) or ("лучше" in txtarr and "или" in txtarr)): t1, t2 = getVibor(txt) if (t1 > 0.2): return t1, t2 if ((("го" in txtarr) or ("кинь" in txtarr) or ("пришли" in txtarr) or ("отправь" in txtarr)) and ("цитату" in txtarr) and ("баш" in txt or "bash" in txt)): return 0.9, getBashorg() if ((("го" in txtarr) or ("кинь" in txtarr) or ("пришли" in txtarr) or ("отправь" in txtarr)) and ("цитату" in txtarr)): return 0.9, getRandquote() if (txt == "цитату"): return 0.7, getRandquote() if (("посоветуй" in txt or "порекомендуй" in txt or "подскажи" in txt or "кинь" in txt) and ("аниме" in txtarr or "анимцо" in txtarr or "анимэ" in txtarr or "anime" in txtarr)): return 0.9, getRandAnime() if ("какое" in txtarr and "аниме" in txtarr and ("посмотреть" in txtarr or "глянуть" in txtarr)): return 0.9, getRandAnime() if (("кинь" in txt or "отправь" in txt or "пришли" in txt or "го" in txtarr) and ("кошкодев" in txt or "некочан" in txt or "некотян" in txt or "с кошачьеми уш" in txt or "с кошачьими уш" in txt)): return 0.9, getNekochan() try: if ("arxiv.org" in txt): return 0.8, getArxivArticle(txt) if (" стать" in txt and "arxiv" in txt): return 0.85, getArxivArticle(txt) if (" стать" in txt and (" архива" in txt or " архиве" in txt)): return 0.85, getArxivArticle(txt) if ("най" in txt and "arxiv" in txt): return 0.85, getArxivArticle(txt) if ("най" in txt and "архиве" in txt): return 0.85, getArxivArticle(txt) except Exception as e: logD(e) if (("кинь" in txtarr or "отправь" in txtarr or "пришли" in txtarr or "покажи" in txtarr or "го" in txtarr) and ("себя" in txtarr or "свою фот" in txt or "селфи" in txtarr or "сэлфи" in txtarr or "себяшку" in txtarr or "с собой" in txt)): return 0.7, getKurisuArt() # if():return 0.85,getMedicalArticle(txt) try: return getAnswerFromKnowledger( txt.replace("что ты думаешь о", "что такое"), utils.getMainTheme(" ".join(arr)), arr[-2]) except Exception as e: logD(e) return 0, ""
def weakWiki(url): #HTML Wiki if (debug): logD("Обращение к html Википедии") bashExec("wget -O /tmp/wikitest.html -U \"" + ua + "\" \"" + url + "\" 2>/dev/null") tmp = bashExec("html2text -utf8 /tmp/wikitest.html").decode( "utf-8") + "поискsearch" tmp = tmp[min(tmp.index("поиск") + tmp.index("search")) + 5:] tmp = tmp[:tmp.index("*****")] tmp = tmp.replace("\n", " ")
def SEMlearnByFile(fname): global semwithoutends,semwordcount,semmaxcount,semmeancount fl=open(fname,"r") arr=fl.readlines() fl.close() import sys for i in range(len(arr)): if(i%50==0): sys.stdout.write(str(int(10000*i/len(arr))/100)+"% \r") SEMlearnByTXT(arr[i]) logD("Complete!")
def makeDictionaryByFile(fname):#Создание словаря из слов в файле fl=open(fname,"r") lns=fl.readlines() fl.close() for s in lns: words=str2arr(s) for w in words: try: inf=wordInfo(w) except Exception as e: logD(w+": "+str(e)) time.sleep(20)
def getAnswerByDial( self, arr): #arr -- массив фраз в диалоге. Метод пытается продолжить его dialarr = [] for i in arr: dialarr.append(dialline(i)) maxval = 0 maxval_2 = 0 maxvalarr = [] maxposarr = [] dial_l = len(dialarr) weights = weightFunction(dial_l - 1) for i in range(dial_l - 2, -1, -1): weights += weightFunction(i) # logD(str(weights)) for pos in range(len(self.diallines) - dial_l + 1): summ = weightFunction(dial_l - 1) * self.diallines[ pos + dial_l - 1].compareWithMe(dialarr[dial_l - 1], faster=True) if (summ < maxval_2): continue #Дальнейшая проверка не имеет смысла: не перегонит... Наверное for i in range(dial_l - 2, -1, -1): #С последней фразы начинаем поиск summ += weightFunction(i) * self.diallines[ pos + i].compareWithMe(dialarr[i], faster=True) try: #Патч бреда weakeffect = len(self.diallines[pos + dial_l].words) if (weakeffect > 7): summ -= weightFunction(-2) * (weakeffect - 7) / 3 except Exception as e: logD("getAnswerError: " + str(e)) if (summ >= maxval): maxvalarr.append(summ) maxposarr.append(pos) maxval = summ maxval_2 = maxval / (dial_l + 1) newmaxvalarr = [] newmaxposarr = [] tmpmaxval = 0.8 * maxval #Самое долгое уже позади, можно расслабиться for k in range(len(maxvalarr)): if (maxvalarr[k] > tmpmaxval): newmaxvalarr.append(maxvalarr[k]) newmaxposarr.append( maxposarr[k] ) #По-хорошему, устроить ещё одну проверку при faster=False, но пофиг if (len(newmaxposarr) == 0): return 0, "" tmpp = random.randrange(len(newmaxposarr)) score = newmaxvalarr[tmpp] / weights ind = newmaxposarr[tmpp] + dial_l if (ind >= len(self.diallines)): return score, "" return score, self.diallines[ind].orig
def strongWikiEN(q): #Запрос к Википедии (англ.) if (debug): logD("Обращение к англоязычной Википедии") if (q.count(" ") > 3): return "" url = "https://en.wikipedia.org/w/api.php?action=opensearch&format=json&formatversion=2&search" + urllib.parse.urlencode( [("", q)]) + "&namespace=0&limit=10&suggest=true" ans = getJSON(url) try: tmp = ans[2][0] if (len(tmp) < 25 or tmp.count("— ") < 1): tmp = weakWiki(ans[3][0]) return tmp except: return ""
def getJSON(url,ttl=5): try: bts = urllib.request.urlopen(url) s=bts.read().decode('UTF-8') bts.close() return json.loads(s) except Exception as e: logD("JSON error: "+str(e)) if(ttl>0): time.sleep(0.4) return getJSON(url,ttl-1) return json.loads("{}")
def getHTML(url,ttl=5): try: bts = urllib.request.urlopen(url) s=bts.read().decode('UTF-8') bts.close() return s except Exception as e: logD("HTML error: "+str(e)) if(ttl>0): time.sleep(0.4) return getHTML(url,ttl-1) return ""
def SEMsaveData(fname): global semwithoutends,semwordcount,semmaxcount,semmeancount fl=open(fname+".semwithoutends","w") tmp=fl.write(semwithoutends[0]) for i in semwithoutends[1:]: fl.write("\n"+i) fl.close() fl=open(fname+".semwordcount","w") tmp=fl.write(str(semwordcount[0])) for i in semwordcount[1:]: fl.write("\n"+str(i)) fl.close() logD("Saved!")
def onSpeechRec(s): global logsarr logD("Text: " + s) try: logsarr.append("You: " + s) if (len(logsarr) > 25): logsarr = logsarr[-25:] sys.stdout.write('\r\033[1;33mYou\033[0m: ' + s + '\n') s = s.replace("amadeus", "амадеус").replace("kurisu", "Курису") isPriv = ( s.count("урису") + s.count("акис") + s.lower().count("ассистент") + s.count("мадей") + s.count("мадеус") + s.count("ристина") > 0) d.getAnswer(s, isPrivate=True) except Exception as e: logD(e)
def __init__(self, fname): #fname -- уже обработанный файл self.source = fname self.diallines = [].copy( ) #Потому что реализация ООП в Питоне -- то ещё минное поле! F**k u, bitch object! fl = open(fname, 'r') lns = fl.readlines() fl.close() for i in lns: try: t = line2dict(i) self.diallines.append(dialline(t)) except Exception as e: #pass logD(e) logD(i)
def otvetMailRu(q): #Запрос к mail.ru if (debug): logD("Обращение к ответам mail.ru") url = "https://go.mail.ru/answer_json?ajax_id=21&q" + urllib.parse.urlencode( [("", q)]) + "&num=2&sf=0&dwh_pagetype=search&dwh_platform=web" try: ans = utils.getJSON(url) res = (ans['results'][0]['banswer'] + "\n\n" + ans['results'][0]['answer']).replace("<b>", "").replace("</b>", "") if (res.count("!") + res.count(")") - res.count("(") + res.count("))") + res.count("!!") + res.count("!!!") > 4): return "" return res except: return ""
def SEMloadData(fname): global semwithoutends,semwordcount,semmaxcount,semmeancount fl=open(fname+".semwithoutends","r") semwithoutends=fl.readlines() for i in range(len(semwithoutends)): semwithoutends[i]=semwithoutends[i].replace("\n","") fl.close() fl=open(fname+".semwordcount","r") semwordcount=fl.readlines() for i in range(len(semwordcount)): semwordcount[i]=float(semwordcount[i]) fl.close() semmaxcount=max(semwordcount) semmeancount=sum(semwordcount)/len(semwordcount) logD("Loaded!")
def getStartForm(w):#Узнать начальную форму try: p=morph.parse(w)[0]#https://habr.com/post/176575/ -- Спасибо ))) if('PRTF' in p.tag):#Причастие? return p.inflect({'sing', 'nomn'}).word return p.normal_form except Exception as e: logD("Morph error: "+str(e)) wd=deleteEnd(w) if(wd==w):return w end=w[len(wd):] if(end in ["ишь","ите","ьте","ить","ат","ят","ит"]): return wd+"ить" if(end in ["ешь","ете","ать","ут","ют","ет"]): return wd+"ать" return w
def patchDictionary(): global dictionw,dictioni for i in range(len(dictionw)): d=dictioni[i] formlen=max(3,len(d['root'])) for j in range(len(d['forms'])): if(j>=len(d['forms'])):break if(len(d['forms'][j])<formlen):d['forms'].pop(j) if("|гл|" in d['parts'] and len(d['forms'])>13): d['forms']=d['forms'][:13]#Убираем причастия if(len(d['forms'])<2): p=morph.parse(d['word']) for j in p[0].lexeme: if(j.word in d["forms"]):continue d["forms"].append(j.word) if(len(d["forms"])==13):break try:bashExec("echo \""+d['word']+"\t"+str(d)+"\" >> utilsdata/dictionary.patched") except Exception as e:logD("Err: Cannot add word to dictionary, "+str(e))
def __init__(self, context=[], identificator=0, sendfunction=sendfstub, typefunction=typefstub): logD("Вызван конструктор диалога") self.__context = context.copy() #Клбчевые слова разговора self.__ident = identificator #Уникальный идентификатор собеседника self.__sendf = sendfunction #Функция отправки. Первым аргументом передаётся текст, вторым -- идентификатор self.__typef = typefunction #Функция тайпинга. Передаётся 1 аргумент -- идентификатор self.__debuginfo = "" self.__lastsent = "" #Последнее отправленное сообщение self.dialsit = [ '\n', '\n' ].copy() #Последние несколько фраз, более 5 точно нет смысла tmps = self.__ident if (type(tmps) != str): tmps = str(tmps) self.localdial = pa.getAnswerByFileAutolearn("data/localdials/dial" + tmps + ".preprocessed")
def getMedicalArticle( q ): #Реализовать в классе ниже, https://www.ncbi.nlm.nih.gov/books/NBK25498/#chapter3.Introduction -- документация if (utils.isEnglish(q) < 0.8): q = utils.rutoen(q) url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term" + urllib.parse.urlencode( [("", q)]) + "&usehistory=y" logD("Original query: " + q) bts = urllib.request.urlopen(url, timeout=40) s = bts.read().decode('UTF-8') bts.close() res = "https://www.ncbi.nlm.nih.gov/pubmed/" + getXMLfield(s, "Id") bts = urllib.request.urlopen(res, timeout=40) s = bts.read().decode('UTF-8') bts.close() tit = getXMLfield(s, "title") # tit=tit[:tit.rindex(" res += " -- " + utils.entoru(tit) return res
def getArxiv0(q): #Arxiv.org low if (utils.isEnglish(q) < 0.8): q = utils.rutoen(q) q = q.replace("\n", "") #В отличии от Баша, у Архива есть документированный API: url = "http://export.arxiv.org/api/query?search_query" + urllib.parse.urlencode( [("", q)]) + "&start=0&max_results=1" logD("Original query: " + q) bts = urllib.request.urlopen(url, timeout=40) s = bts.read().decode('UTF-8') bts.close() try: s = s[s.index("<entry>"):s.index("</summary>")] link = s[s.index("<id>") + 4:s.index("</id>")].replace( "/abs/", "/pdf/") desc = utils.entoru(s[s.index("<title>") + 7:s.index("</title>")]) return link + " - " + desc except Exception as e: logD(e) return "Ошибка запроса ("
def getNekochan(): global nekochansurls while (len(nekochansurls) == 0): try: j = getJSON("https://api.vk.com/method/wall.get?v=5.87&offset=" + str(random.randint(0, 6000)) + "&domain=cat_autism&count=100&access_token=" + APIkeys.vkToken) j = j['response']['items'] for i in j: try: for k in i['attachments']: try: nekochansurls.append( k['photo']['sizes'][-1]['url']) except: pass except: pass except Exception as e: logD(e) return nekochansurls.pop(random.randint(0, len(nekochansurls) - 1))
def getKurisuArt(): global kurisuurls while (len(kurisuurls) == 0): try: j = getJSON( "https://api.vk.com/method/wall.search?v=5.87&offset=" + str(random.randint(0, 203)) + "&domain=steinsgate&query=%23Kurisu&count=100&access_token=" + APIkeys.vkToken) j = j['response']['items'] for i in j: try: for k in i['attachments']: try: kurisuurls.append(k['photo']['sizes'][-1]['url']) except: pass except: pass except Exception as e: logD(e) return kurisuurls.pop(random.randint(0, len(kurisuurls) - 1))
def say(msg, voice="jane", emotion="neutral", speed=1): global logsarr logsarr.append("Kurisu: " + msg) if (len(logsarr) > 25): logsarr = logsarr[-25:] server.answer = "hideSplash();startSay();try{setLogs(" + str( logsarr) + ");}catch(e){console.log(e);}" try: msg = msg.replace("мадей", "мадэй") msg = msg.replace("акисэ", "акисэ+") msg = msg.replace("ИИ", "И.И.") url = "https://tts.voicetech.yandex.net/generate?key=" + APIkeys.YandexSpeechKit + "&text" + urllib.parse.urlencode( [("", msg) ]) + "&format=wav&lang=ru-RU&speaker=" + voice + "&speed=" + str( speed) + "&emotion=" + emotion os.system("wget \"" + url + "\" -O /tmp/yandexspeech.wav 2>/dev/null") os.system("play /tmp/yandexspeech.wav speed 1.1 2>/dev/null") except Exception as e: logD(e) os.system('espeak -vru+f1 -s 160 "' + msg.replace("\"", " ") + '" 2>/dev/null') server.answer = "stopSay();try{setLogs(" + str( logsarr) + ");}catch(e){console.log(e);}"
def thequest(q): #Запрос к thequestion if (debug): logD("Обращение к html thequestion.ru") url = "https://thequestion.ru/search/questions?limit=2&offset=0&q" + urllib.parse.urlencode( [("", q)]) + "&sort=date" ans = utils.getJSON(url) try: nurl = ans['items'][0]['absoluteUrl'] time.sleep(0.84 + random.random() * 1) bts = urllib.request.urlopen(nurl) s = bts.read().decode('UTF-8') bts.close() startq = "class=\"answer__text\"><p>" endq = "</p></qml>" s = s[s.index(startq) + len(startq):] s = s[:s.index(endq)] s = s.replace(""", "\"").replace("<p>", "\n").replace("</p>", "").replace("<br>", "\n") return s except Exception as e: logD(e) return ""
def sendAnswer( self, txt, isClear=False ): #Если isClear=True, то сообщение отправляется сразу и без проверок if (txt == ""): self.isAnswered = False logD("Null Answer") return if (isClear): self.__sendf(txt, self.__ident) return if (self.__lastsent == txt): if (utils.getSemanticLoad(txt) < 0.8): self.isAnswered = False logD("Null Answer") return self.__lastsent = txt #TODO: Ставить в женский род txtarr = txt.replace("\n", "").split("/pause") lag = time.time() - self.t1 self.__debuginfo += "\nВремя поиска ответа: " + str(lag) + " c" if (len(txtarr[0]) > 5): self.__typef(self.__ident) realanstxt = txtarr[0].replace("/pause", "").replace("\\n", "\n") self.__sendf(realanstxt, self.__ident) if (len( txt.replace("/pause", "").replace("\\n", "").replace( " ", "").replace("\n", "")) > 0): self.isAnswered = True # self.dialsit.append(txt) else: self.isAnswered = False #Если реальный текст для отправки пустой или состоит из символа перевода строки, то это не ответ logD("Null Answer") return for i in txtarr[1:]: self.__typef(self.__ident) if (i == "/pause" or i == "" or i == "/typing"): #просто пауза time.sleep(0.25) continue sleepytime = self.timePar0 + self.timePar1 * len(i) time.sleep( min(sleepytime, 5) ) #отправка статуса о наборе соообщения обычно 5 секунд (например, в Телеграме) while (sleepytime > 5): self.__typef(self.__ident) sleepytime -= 5 time.sleep(min(sleepytime, 5)) i = i.replace("/pause", "").replace("/typing", "").replace("\\n", "\n") if (len(i) > 0): i = i[0].upper() + i[1:] self.__sendf(i, self.__ident)
def __init__(self, fname): #fname -- уже обработанный файл self.source = fname self.diallines = [].copy( ) #Потому что реализация ООП в Питоне -- то ещё минное поле! F**k u, bitch object! try: fl = open(fname, 'r') lns = fl.readlines() fl.close() for i in lns: try: t = line2dict(i) self.diallines.append(dialline(t)) except Exception as e: logD(e) logD(i) except Exception as e: logD("Warn: не удалось загрузить файл \"" + self.source + "\", игнорируется")
def checkText(s): url="https://speller.yandex.net/services/spellservice.json/checkText?options=4&text"+urllib.parse.urlencode([("",s)]) try: errs=getJSON(url) for i in errs: if(len(i['s'])==0): logD("YaSpeller warn: no variants for "+i['word']) continue pos=i['pos'] l=i['len'] fr=i['word'] if(fr in ['Курису','курису','Макисэ','макисэ']):continue#Это не ошибка to=i['s'][0] s=s[:pos]+s[pos:int(pos+l*1.5)].replace(fr,to)+s[int(pos+l*1.5):]#Test this place carefully except Exception as e: logD("YaSpeller error: "+str(e)) logD("Ya.Speller:"+s) return s
def getLurkAnswer(inp): #Как с Вики, только с Лурком (рус.) if (debug): logD("Обращение к html Лурка") try: # url="https://lurkmore.co/"+inp url = "https://lurkmore.co/index.php?title=Служебная:Search&search=" + inp + "&go=Перейти" try: bashExec("wget -O /tmp/wikitest.html -U -q \"" + ua + "\" -p \"" + url + "\" 2>/dev/null") except: return " " fl = open("/tmp/wikitest.html") tmp = utils.wordInfo(inp)['root'].lower() if (len(tmp) == 0): tmp = utils.getStartForm(inp).lower() lns = "" for i in range(0, 5000): try: lns = fl.readline() lnsl = lns.lower() if (lnsl.count('<meta name="description" content="') > 0): lns = lns[len('<meta name="description" content="' ):lns.rindex(".") + 1] return lns elif (lnsl.count(tmp + " ") + lnsl.count(tmp + "\xa0") + lnsl.count(tmp + "</b>") > 0 and lnsl.count("—") > 0 and lnsl.count("title") < 1 and lnsl.count("quote") < 1): if (lns.index("—") - lns.index(tmp) > 0): tmp2 = lns.replace("<p>", "").replace( "<b>", "").replace("</b>", "").replace( "\" />", "").replace("&#160;", " ") tmp2 = tmp2[tmp2.index(tmp[1:]) - 1:tmp2.rindex(".") + 1] if (len(tmp2) > 4 * len(tmp)): return tmp2 except: if (debug): logD("Error after line " + lns) fl.close() except Exception as e: logD(str(e)) return "" return ""
def getRandAnime(): #Случайное аниме с findanime try: bashExec( "wget -O /tmp/randanime.html -U -q \"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:58.0) Gecko/20100101 Firefox/58.0\" -p \"http://findanime.me/internal/random\" 2>/dev/null" ) except: pass fl = open("/tmp/randanime.html") s = "" for i in range(1000): try: s = s + fl.readline() except: break fl.close() res = "" try: tmp = s.index("<span class='name'>") aniname = s[tmp:tmp + 200] aniname = aniname[aniname.index(">") + 1:aniname.index("</span>")] res += aniname except Exception as e: logD(e) try: tmp = s.index("<meta itemprop=\"description\" content=\"") descript = s[tmp + 33:tmp + 500] descript = descript[descript.index("\"") + 1:descript.index("/>") - 1] res += "\n\n" + descript except Exception as e: logD(e) try: tmp = s.index("<meta itemprop=\"url\" content=\"") url = s[tmp + 25:tmp + 200] url = url[url.index("\"") + 1:url.index("/>") - 1] res += "\n\nСмотреть онлайн: " + url except Exception as e: logD(e) return res.replace(""", "\"")