def test_find_with_letter_pattern_in_string(self): string = 'onetwothreefourfive' assert find(string, 'one') is True assert find(string, 'two') is True assert find(string, 'three') is True assert find(string, 'four') is True assert find(string, 'five') is True
def test_find_with_number_pattern_in_string(self): string = '12345' assert find(string, '1') is True assert find(string, '12') is True assert find(string, '123') is True assert find(string, '1234') is True assert find(string, '12345') is True
def test_find_with_empty_pattern(self): string = 'a' pattern = '' assert find(string, pattern) is False string = '' pattern = '' assert find(string, pattern) is False
def location(bot, update): lang = get_lang(update) bot.send_message(chat_id=update.message.chat_id, text=rand_str(lang, "try-find-close-hospital")) poi = finder.find(lat=update.message.location.latitude, lng=update.message.location.longitude) results = [ finder.get_details(poi[i].attrib["id"]) for i in range(0, len(poi)) ] bot.send_message(chat_id=update.message.chat_id, parse_mode=telegram.ParseMode.MARKDOWN, text=i18n.t("strings.{}.closest-hospital-is".format( get_lang(update)), name=results[0]["name"])) bot.send_location(chat_id=update.message.chat_id, latitude=poi[0].attrib["lat"], longitude=poi[0].attrib["lon"]) if len(results) > 1: time.sleep(1.5 + 0.5 * (len(results) - 1)) other_options = rand_str(get_lang(update), "other-hospital-options") + "\n" for i in results[1:]: other_options += "∙ *{}*\n".format(i["name"]) bot.send_message(chat_id=update.message.chat_id, text=other_options, parse_mode=telegram.ParseMode.MARKDOWN)
def query_text(query): try: data = {} choice = query.query data = finder.find(choice) desc = 'Поиск: ' + data["name"] text = 'Поиск по: ' + query.query + '\n\n*' + str( data["name"] ) + ' (' + str( data["num"] ) + ' номер в таблице)\n' + '*' + 'Русское название: ' + '*' + data[ "Rname"] + '*' + '\nЛатинское название: ' + '*' + data[ "Lname"] + '*' + '\nАнглийское название: ' + '*' + data[ "Ename"] + '*' + '\nДата открытия: ' + '*' + str( data["Birth"] ) + '*' + '\nЭлектронная оболочка: ' + '*' + data[ "Eshell"] + '*' + '\nСтепень окисления: ' + '*' + data[ "Doxi"] + '*' + '\nАтомная масса: ' + '*' + data[ "Amass"] + ' (г/моль)' + '*' + '\nПлотность: ' + '*' + data[ "density"] + ' (г/см³)' + '*' + '\nТемпература плавления: ' + '*' + data[ "Tmelting"] + '°C' + '*' + '\nТемпература кипения: ' + '*' + data[ "boil"] + '°C' + '*' r = types.InlineQueryResultArticle( '1', desc, types.InputTextMessageContent(text, parse_mode="MARKDOWN")) bot.answer_inline_query(query.id, [r]) rate.append(data["name"]) except: er = 'Поиск по: ' + query.query + '\n\nЭлемент не найден!' r2 = types.InlineQueryResultArticle( '2', title='Ошибка!', description='Элемент не найден!', input_message_content=types.InputTextMessageContent(er)) bot.answer_inline_query(query.id, [r2])
def getAvailableSwap(self): result = find( 'SwapAvailable', subprocess.check_output(['cat', '/proc/meminfo' ]).decode('utf-8').split('\n')) if not result == '': return result else: return 'SwapAvailable: 0 kB'
def find_files(args): """ Reads data from files and outputs to files """ for name in find(args.glob): basename, _ = os.path.splitext(name) new_name = basename + "_parsed.csv" if args.save: output = open(new_name, WRITEABLE) else: output = None pipe(args, open(name), output) return
def analyze(args): """ Reads data from files and outputs to files """ for name in find(args.glob): basename, _ = os.path.splitext(name) new_name = basename + "_parsed.csv" if args.save: output = open(new_name, WRITEABLE) else: output = None pipe(args, open(name), output) return
def rater(message): if message.from_user.id in admin: x = message.text.split(' ')[1] if x == 777: bot.send_message(message.chat.id, rate) data = finder.find(x) if data == None: bot.send_message(message.chat.id, 'Такого элемента нет') else: y = rate.count(data["name"]) text = data["name"] + ' - искалось ' + str(y) + ' раз' bot.send_message(message.chat.id, text) else: bot.send_message(message.chat.id, 'Вы не уполномочены!')
def finds(message): keyboard = types.InlineKeyboardMarkup() rateMe = types.InlineKeyboardButton( text="Оценить!", url="https://telegram.me/storebot?start=zChemistryBot") joinG = types.InlineKeyboardButton(text='Группа!', url="t.me/zChemistryGroup") joinB = types.InlineKeyboardButton(text='Канал!', url="t.me/zChemistryBlog") keyboard.add(rateMe, joinG, joinB) choice = message.text data = finder.find(choice) if data == None: text = 'Такого не существует!' bot.send_message(message.chat.id, text) else: rate.append(data["name"]) try: photo = open( elements_1[choice]['/home/zchemistry/Chem/' + data["link"]], 'rb') bot.send_photo(message.chat.id, photo) photo.close() except Exception as error: print(error) text = '*' + str(data["name"]) + ' (' + str( data["num"] ) + ' номер в таблице)\n' + '*' + 'Русское название: ' + '*' + data[ "Rname"] + '*' + '\nЛатинское название: ' + '*' + data[ "Lname"] + '*' + '\nАнглийское название: ' + '*' + data[ "Ename"] + '*' + '\nДата открытия: ' + '*' + str( data["Birth"] ) + '*' + '\nЭлектронная оболочка: ' + '*' + data[ "Eshell"] + '*' + '\nСтепень окисления: ' + '*' + data[ "Doxi"] + '*' + '\nАтомная масса: ' + '*' + data[ "Amass"] + ' (г/моль)' + '*' + '\nПлотность: ' + '*' + data[ "density"] + ' (г/см³)' + '*' + '\nТемпература плавления: ' + '*' + data[ "Tmelting"] + '°C' + '*' + '\nТемпература кипения: ' + '*' + data[ "boil"] + '°C' + '*' bot.send_message(message.chat.id, text, parse_mode="MARKDOWN") bot.send_message( message.chat.id, 'Понравилось? ставь самую высокую оценку.\nА также присоединяйся к нам!', reply_markup=keyboard, disable_web_page_preview=True)
def main(): lno = 1 word_tot = 0 corr = 0 fin = open("./BanglaEnglish_FIRE2013_AnnotatedDev.txt", 'r') sent = fin.readline() while (sent): words = [] lang = [] sent = sent.split() for elem in sent: elem = elem.split('\\') lang.append(elem[1][0]) words.append(elem[0]) type_map = defaultdict(str) type_count = defaultdict(int) word_count = 0 for word in words: word = word.strip(" ") word_count += 1 type_word = finder.find(word) type_map[word] = type_word type_count[type_word] += 1 ''' for word in words: print (word+": "+type_map[word]) print("Type count of English: "+str(type_count["English word"])) print("Type count of Bengali: "+str(type_count["Bengali word"])) ''' if ((type_count["English word"]) > (type_count["Bengali word"])): default = "e" else: default = "b" print(str(lno) + default) lno += 1 for i in range(len(words)): word_count = 0 type_count["English word"] = 0 type_count["Bengali word"] = 0 if ((type_map[words[i]] == "Bengali word" and lang[i] == 'B') or (type_map[words[i]] == "English word" and lang[i] == 'E')): corr += 1 word_tot += 1 #for word in words: # print (word+": "+type_map[word]) sent = fin.readline() print("The accuracy is: " + str(corr * 100 / word_tot) + "%")
def main(): lno=1 word_tot=0 corr=0 fin=open("./BanglaEnglish_FIRE2013_AnnotatedDev.txt",'r') sent=fin.readline() while(sent): words=[] lang=[] sent=sent.split() for elem in sent: elem=elem.split('\\') lang.append(elem[1][0]) words.append(elem[0]) type_map = defaultdict(str) type_count = defaultdict(int) word_count=0 for word in words: word=word.strip(" ") word_count+=1 type_word=finder.find(word) type_map[word]=type_word type_count[type_word]+=1 ''' for word in words: print (word+": "+type_map[word]) print("Type count of English: "+str(type_count["English word"])) print("Type count of Bengali: "+str(type_count["Bengali word"])) ''' if((type_count["English word"])>(type_count["Bengali word"])): default="e" else: default="b" print(str(lno)+default) lno+=1 for i in range(len(words)): word_count=0 type_count["English word"]=0 type_count["Bengali word"]=0 if((type_map[words[i]]=="Bengali word" and lang[i]=='B') or (type_map[words[i]]=="English word" and lang[i]=='E')): corr+=1 word_tot+=1 #for word in words: # print (word+": "+type_map[word]) sent=fin.readline() print("The accuracy is: "+str(corr*100/word_tot)+"%")
def output(input_json): ''' Find the module to calculate the output, 'ask4' should be the module name. ''' if 'ask4' not in input_json: #input_json['error'] = "no ask4 no done anything" return {error : "no ask4 no done anything"} asked = input_json['ask4'].replace('.', '_') try: main = finder.find(Path_of_Workers, asked) output = main(input_json) or {} return output except (Exception) as e: print('except exception:') print(e) # noop: no operation return noop_service(input_json)
def getModelName(self): return find( 'Model name', subprocess.check_output(['lscpu']).decode('utf-8').split('\n'))
def getFreeSwap(self): return find( 'SwapFree', subprocess.check_output(['cat', '/proc/meminfo' ]).decode('utf-8').split('\n'))
def location(bot, update): print(update.message.location) bot.send_message(chat_id=update.message.chat_id, text="") poi = finder.find(lat=update.message.location.latitude, lng=update.message.location.longitude)
def getTotalMemory(self): return find( 'MemTotal', subprocess.check_output(['cat', '/proc/meminfo' ]).decode('utf-8').split('\n'))
def test_find_with_none_pattern(self): string = '' pattern = None assert find(string, pattern) is False
def test_find_with_empty_string(self): string = '' pattern = 'one' assert find(string, pattern) is False
def test_find_with_none_string(self): string = None pattern = 'one' assert find(string, pattern) is False
def main(): type_map = defaultdict(str) type_count = defaultdict(int) word_count = 0 sentence = input("Please enter your sentence: ") words = sentence.split() for word in words: word = word.strip(" ") word_count += 1 type_word = finder.find(word) type_map[word] = type_word type_count[type_word] += 1 for word in words: print(word + ": " + type_map[word]) print("Type count of English: " + str(type_count["English word"])) print("Type count of Bengali: " + str(type_count["Bengali word"])) if ((type_count["English word"]) > (type_count["Bengali word"])): default = "e" else: default = "b" print(default) for i in range(len(words)): word_count = 0 type_count["English word"] = 0 type_count["Bengali word"] = 0 if (default == "e"): if (beng.beng_word(words[i]) == 1 and type_map[words[i]] == "English word"): if (i > 1 and i < (len(words) - 2)): word_count = 4 for j in range(i - 2, i + 3): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] > type_count["English word"]): type_map[words[i]] = "Bengali word" elif (i <= 1): word_count = i + 2 for j in range(i + 3): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] > type_count["English word"]): type_map[words[i]] = "Bengali word" elif (i >= (len(words) - 2)): word_count = (len(words) - i) + 1 for j in range(i - 2, len(words)): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] > type_count["English word"]): type_map[words[i]] = "Bengali word" elif (default == "b"): if (beng.beng_word(words[i]) == 1 and type_map[words[i]] == "English word"): if (i > 1 and i < (len(words) - 2)): word_count = 4 for j in range(i - 2, i + 3): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] >= type_count["English word"]): type_map[words[i]] = "Bengali word" elif (i <= 1): word_count = i + 2 for j in range(i + 3): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] >= type_count["English word"]): type_map[words[i]] = "Bengali word" elif (i >= (len(words) - 2)): word_count = (len(words) - i) + 1 for j in range(i - 2, len(words)): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] >= type_count["English word"]): type_map[words[i]] = "Bengali word" for word in words: print(word + ": " + type_map[word])
def main(): lno=1 word_tot=0 corr=0 init_corr=0 fin=open("./beng_corpus.txt",'r') fout_pred=open("./predicted_tags_eng.txt",'w') fout_corr=open("./corrected_tags_eng.txt",'w') sent=fin.readline() while(sent): sent = re.sub(r'[^\w\s]','',sent) words=[] sent=sent.split() for elem in sent: elem.strip() words.append(elem) type_map = defaultdict(str) type_count = defaultdict(int) word_count=0 for word in words: word=word.strip(" ") word_count+=1 type_word=finder.find(word) type_map[word]=type_word type_count[type_word]+=1 #print(str(word)+"(Detect:"+str(type_map[word])+")") if(type_word=="English word"): fout_pred.write(word+"\\"+"E ") elif(type_word=="Bengali word"): fout_pred.write(word+"\\"+"B ") else: fout_pred.write(word+"\\"+"N ") ''' for word in words: print (word+": "+type_map[word]) print("Type count of English: "+str(type_count["English word"])) print("Type count of Bengali: "+str(type_count["Bengali word"])) ''' if((type_count["English word"])>(type_count["Bengali word"])): default="e" else: default="b" print(str(lno)+default) lno+=1 for i in range(len(words)): word_count=0 type_count["English word"]=0 type_count["Bengali word"]=0 if(default=="e"): if(beng.beng_word(words[i])==1 and type_map[words[i]]=="English word"): if(i>1 and i<(len(words)-2)): word_count=4 for j in range(i-2,min((len(words)-1),(i+3))): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>type_count["English word"]): type_map[words[i]]="Bengali word" elif (i<=1): word_count=min(i+2,len(words)-1) for j in range(min((len(words)-1),(i+3))): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>type_count["English word"]): type_map[words[i]]="Bengali word" elif(i>=(len(words)-2)): word_count= (len(words)-i)+1 for j in range(i-2,len(words)): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>type_count["English word"]): type_map[words[i]]="Bengali word" elif(default=="b"): if(beng.beng_word(words[i])==1 and type_map[words[i]]=="English word"): if(i>1 and i<(len(words)-2)): word_count=4 for j in range(i-2,min((len(words)-1),(i+3))): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>=type_count["English word"]): type_map[words[i]]="Bengali word" elif (i<=1): word_count=min(i+2,len(words)-1) for j in range(min((len(words)-1),(i+3))): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>=type_count["English word"]): type_map[words[i]]="Bengali word" elif(i>=(len(words)-2)): word_count= (len(words)-i)+1 for j in range(i-2,len(words)): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>=type_count["English word"]): type_map[words[i]]="Bengali word" if(type_map[words[i]]=="Bengali word"): fout_corr.write(words[i]+"\\"+"B ") elif(type_map[words[i]]=="English word"): fout_corr.write(words[i]+"\\"+"E ") else: fout_corr.write(words[i]+"\\"+"N ") #for word in words: # print (word+": "+type_map[word]) sent=fin.readline() fin.close() fout_pred.close() fout_corr.close()
def main(): lno = 1 word_tot = 0 corr = 0 init_corr = 0 fin = open("./BanglaEnglish_FIRE2013_AnnotatedDev.txt", 'r') sent = fin.readline() while (sent): words = [] lang = [] sent = sent.split() for elem in sent: elem = elem.split('\\') lang.append(elem[1][0]) words.append(elem[0]) type_map = defaultdict(str) type_count = defaultdict(int) word_count = 0 for word in words: word = word.strip(" ") word_count += 1 type_word = finder.find(word) type_map[word] = type_word type_count[type_word] += 1 print(str(word) + "(Detect:" + str(type_map[word]) + ")") if ((type_map[word] == "Bengali word" and lang[words.index(word)] == 'B') or (type_map[word] == "English word" and lang[words.index(word)] == 'E')): init_corr += 1 ''' for word in words: print (word+": "+type_map[word]) print("Type count of English: "+str(type_count["English word"])) print("Type count of Bengali: "+str(type_count["Bengali word"])) ''' if ((type_count["English word"]) > (type_count["Bengali word"])): default = "e" else: default = "b" print(str(lno) + default) lno += 1 for i in range(len(words)): word_count = 0 type_count["English word"] = 0 type_count["Bengali word"] = 0 if (default == "e"): if (beng.beng_word(words[i]) == 1 and type_map[words[i]] == "English word"): if (i > 1 and i < (len(words) - 2)): word_count = 4 for j in range(i - 2, min((len(words) - 1), (i + 3))): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] > type_count["English word"]): type_map[words[i]] = "Bengali word" elif (i <= 1): word_count = min(i + 2, len(words) - 1) for j in range(min((len(words) - 1), (i + 3))): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] > type_count["English word"]): type_map[words[i]] = "Bengali word" elif (i >= (len(words) - 2)): word_count = (len(words) - i) + 1 for j in range(i - 2, len(words)): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] > type_count["English word"]): type_map[words[i]] = "Bengali word" elif (default == "b"): if (beng.beng_word(words[i]) == 1 and type_map[words[i]] == "English word"): if (i > 1 and i < (len(words) - 2)): word_count = 4 for j in range(i - 2, min((len(words) - 1), (i + 3))): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] >= type_count["English word"]): type_map[words[i]] = "Bengali word" elif (i <= 1): word_count = min(i + 2, len(words) - 1) for j in range(min((len(words) - 1), (i + 3))): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] >= type_count["English word"]): type_map[words[i]] = "Bengali word" elif (i >= (len(words) - 2)): word_count = (len(words) - i) + 1 for j in range(i - 2, len(words)): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] >= type_count["English word"]): type_map[words[i]] = "Bengali word" if (type_map[words[i]] == "Bengali word"): det = "B" elif (type_map[words[i]] == "English word"): det = "E" else: det = "N" print( str(words[i]) + "(Orig:" + str(lang[i]) + " Detect:" + str(det) + ")") if ((type_map[words[i]] == "Bengali word" and lang[i] == 'B') or (type_map[words[i]] == "English word" and lang[i] == 'E')): corr += 1 word_tot += 1 #for word in words: # print (word+": "+type_map[word]) sent = fin.readline() print("The uncorrected accuracy is: " + str(init_corr * 100 / word_tot) + "%") print("The accuracy is: " + str(corr * 100 / word_tot) + "%")
def select(self, query_=None, table=None, limit=0): data_, error = find(self.__name, self.__data, table, query_, limit) if error: raise Exception(error) return data_
def getArchitecture(self): return find( 'Architecture', subprocess.check_output(['lscpu']).decode('utf-8').split('\n'))
def find(): term = request.args.get('term') files_found = finder.find(indexed_files, term) return json.dumps(files_found)
def test_find_with_letter_pattern_not_in_string(self): string = 'onetwothreefourfive' assert find(string, 'six') is False assert find(string, 'seven') is False
def main(): type_map = defaultdict(str) type_count = defaultdict(int) word_count=0 sentence=input("Please enter your sentence: ") words=sentence.split() for word in words: word=word.strip(" ") word_count+=1 type_word=finder.find(word) type_map[word]=type_word type_count[type_word]+=1 for word in words: print (word+": "+type_map[word]) print("Type count of English: "+str(type_count["English word"])) print("Type count of Bengali: "+str(type_count["Bengali word"])) if((type_count["English word"])>(type_count["Bengali word"])): default="e" else: default="b" print(default) for i in range(len(words)): word_count=0 type_count["English word"]=0 type_count["Bengali word"]=0 if(default=="e"): if(beng.beng_word(words[i])==1 and type_map[words[i]]=="English word"): if(i>1 and i<(len(words)-2)): word_count=4 for j in range(i-2,i+3): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>type_count["English word"]): type_map[words[i]]="Bengali word" elif (i<=1): word_count=i+2 for j in range(i+3): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>type_count["English word"]): type_map[words[i]]="Bengali word" elif(i>=(len(words)-2)): word_count= (len(words)-i)+1 for j in range(i-2,len(words)): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>type_count["English word"]): type_map[words[i]]="Bengali word" elif(default=="b"): if(beng.beng_word(words[i])==1 and type_map[words[i]]=="English word"): if(i>1 and i<(len(words)-2)): word_count=4 for j in range(i-2,i+3): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>=type_count["English word"]): type_map[words[i]]="Bengali word" elif (i<=1): word_count=i+2 for j in range(i+3): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>=type_count["English word"]): type_map[words[i]]="Bengali word" elif(i>=(len(words)-2)): word_count= (len(words)-i)+1 for j in range(i-2,len(words)): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>=type_count["English word"]): type_map[words[i]]="Bengali word" for word in words: print (word+": "+type_map[word])
def test_find_with_number_pattern_not_in_string(self): string = '12345' assert find(string, '6') is False assert find(string, '67') is False
def example_handler(bot, update): nametext = update.message.text name = nametext bot.send_message(update.message.chat_id, text=finder.find(nametext))
def test_find_with_any_pattern_in_string(self): string = 'asdn\y of23h01148n+)&^%#@xc' assert find(string, '8n+)&') is True assert find(string, '8n+)&A') is False assert find(string, 'A8n+)&') is False assert find(string, '\y ') is True
def main(): lno = 1 word_tot = 0 corr = 0 init_corr = 0 fin = open("./beng_corpus.txt", 'r') fout_pred = open("./predicted_tags_eng.txt", 'w') fout_corr = open("./corrected_tags_eng.txt", 'w') sent = fin.readline() while (sent): sent = re.sub(r'[^\w\s]', '', sent) words = [] sent = sent.split() for elem in sent: elem.strip() words.append(elem) type_map = defaultdict(str) type_count = defaultdict(int) word_count = 0 for word in words: word = word.strip(" ") word_count += 1 type_word = finder.find(word) type_map[word] = type_word type_count[type_word] += 1 #print(str(word)+"(Detect:"+str(type_map[word])+")") if (type_word == "English word"): fout_pred.write(word + "\\" + "E ") elif (type_word == "Bengali word"): fout_pred.write(word + "\\" + "B ") else: fout_pred.write(word + "\\" + "N ") ''' for word in words: print (word+": "+type_map[word]) print("Type count of English: "+str(type_count["English word"])) print("Type count of Bengali: "+str(type_count["Bengali word"])) ''' if ((type_count["English word"]) > (type_count["Bengali word"])): default = "e" else: default = "b" print(str(lno) + default) lno += 1 for i in range(len(words)): word_count = 0 type_count["English word"] = 0 type_count["Bengali word"] = 0 if (default == "e"): if (beng.beng_word(words[i]) == 1 and type_map[words[i]] == "English word"): if (i > 1 and i < (len(words) - 2)): word_count = 4 for j in range(i - 2, min((len(words) - 1), (i + 3))): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] > type_count["English word"]): type_map[words[i]] = "Bengali word" elif (i <= 1): word_count = min(i + 2, len(words) - 1) for j in range(min((len(words) - 1), (i + 3))): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] > type_count["English word"]): type_map[words[i]] = "Bengali word" elif (i >= (len(words) - 2)): word_count = (len(words) - i) + 1 for j in range(i - 2, len(words)): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] > type_count["English word"]): type_map[words[i]] = "Bengali word" elif (default == "b"): if (beng.beng_word(words[i]) == 1 and type_map[words[i]] == "English word"): if (i > 1 and i < (len(words) - 2)): word_count = 4 for j in range(i - 2, min((len(words) - 1), (i + 3))): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] >= type_count["English word"]): type_map[words[i]] = "Bengali word" elif (i <= 1): word_count = min(i + 2, len(words) - 1) for j in range(min((len(words) - 1), (i + 3))): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] >= type_count["English word"]): type_map[words[i]] = "Bengali word" elif (i >= (len(words) - 2)): word_count = (len(words) - i) + 1 for j in range(i - 2, len(words)): if (j != i): type_count[type_map[words[j]]] += 1 if (type_count["Bengali word"] >= type_count["English word"]): type_map[words[i]] = "Bengali word" if (type_map[words[i]] == "Bengali word"): fout_corr.write(words[i] + "\\" + "B ") elif (type_map[words[i]] == "English word"): fout_corr.write(words[i] + "\\" + "E ") else: fout_corr.write(words[i] + "\\" + "N ") #for word in words: # print (word+": "+type_map[word]) sent = fin.readline() fin.close() fout_pred.close() fout_corr.close()
def main(): lno=1 word_tot=0 corr=0 init_corr=0 fin=open("./BanglaEnglish_FIRE2013_AnnotatedDev.txt",'r') sent=fin.readline() while(sent): words=[] lang=[] sent=sent.split() for elem in sent: elem=elem.split('\\') lang.append(elem[1][0]) words.append(elem[0]) type_map = defaultdict(str) type_count = defaultdict(int) word_count=0 for word in words: word=word.strip(" ") word_count+=1 type_word=finder.find(word) type_map[word]=type_word type_count[type_word]+=1 print(str(word)+"(Detect:"+str(type_map[word])+")") if((type_map[word]=="Bengali word" and lang[words.index(word)]=='B') or (type_map[word]=="English word" and lang[words.index(word)]=='E')): init_corr+=1 ''' for word in words: print (word+": "+type_map[word]) print("Type count of English: "+str(type_count["English word"])) print("Type count of Bengali: "+str(type_count["Bengali word"])) ''' if((type_count["English word"])>(type_count["Bengali word"])): default="e" else: default="b" print(str(lno)+default) lno+=1 for i in range(len(words)): word_count=0 type_count["English word"]=0 type_count["Bengali word"]=0 if(default=="e"): if(beng.beng_word(words[i])==1 and type_map[words[i]]=="English word"): if(i>1 and i<(len(words)-2)): word_count=4 for j in range(i-2,min((len(words)-1),(i+3))): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>type_count["English word"]): type_map[words[i]]="Bengali word" elif (i<=1): word_count=min(i+2,len(words)-1) for j in range(min((len(words)-1),(i+3))): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>type_count["English word"]): type_map[words[i]]="Bengali word" elif(i>=(len(words)-2)): word_count= (len(words)-i)+1 for j in range(i-2,len(words)): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>type_count["English word"]): type_map[words[i]]="Bengali word" elif(default=="b"): if(beng.beng_word(words[i])==1 and type_map[words[i]]=="English word"): if(i>1 and i<(len(words)-2)): word_count=4 for j in range(i-2,min((len(words)-1),(i+3))): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>=type_count["English word"]): type_map[words[i]]="Bengali word" elif (i<=1): word_count=min(i+2,len(words)-1) for j in range(min((len(words)-1),(i+3))): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>=type_count["English word"]): type_map[words[i]]="Bengali word" elif(i>=(len(words)-2)): word_count= (len(words)-i)+1 for j in range(i-2,len(words)): if(j!=i): type_count[type_map[words[j]]]+=1 if(type_count["Bengali word"]>=type_count["English word"]): type_map[words[i]]="Bengali word" if(type_map[words[i]]=="Bengali word"): det="B" elif(type_map[words[i]]=="English word"): det="E" else: det="N" print(str(words[i])+"(Orig:"+str(lang[i])+" Detect:"+str(det)+")") if((type_map[words[i]]=="Bengali word" and lang[i]=='B') or (type_map[words[i]]=="English word" and lang[i]=='E')): corr+=1 word_tot+=1 #for word in words: # print (word+": "+type_map[word]) sent=fin.readline() print("The uncorrected accuracy is: "+str(init_corr*100/word_tot)+"%") print("The accuracy is: "+str(corr*100/word_tot)+"%")