Exemplo n.º 1
0
def detect_language(source):
    """
    Detects language of source text
    """
    t = Translator(settings.MS_TRANSLATOR_CLIENT_ID, settings.MS_TRANSLATOR_CLIENT_SECRET)
    bingcode = t.detect_lang(source)
    return Language.objects.get(bingcode=bingcode)
Exemplo n.º 2
0
def translate(source, lang_from, lang_to):
    """
    Translates source text from and to scpecified language
    """
    lang_from_bingcode = lang_from.bingcode if lang_from else None
    t = Translator(settings.MS_TRANSLATOR_CLIENT_ID, settings.MS_TRANSLATOR_CLIENT_SECRET)
    return t.translate(source, lang_from_bingcode, lang_to.bingcode)
Exemplo n.º 3
0
def btranslate(text_message, langfrom, langto):

    translator = Translator(client_id, client_secret)
    phrase_translated = translator.translate(text_message,
                                             lang_from=langfrom,
                                             lang_to=langto)
    #print phrase_translated
    return phrase_translated.encode('utf8')
Exemplo n.º 4
0
def btranslate(text_message, langfrom, langto):
    client_id = config.get('microsoft', 'client_id')
    client_secret = config.get('microsoft', 'client_secret')

    translator = Translator(client_id, client_secret)
    phrase_translated = translator.translate(
        text_message, lang_from=langfrom, lang_to=langto)

    return phrase_translated.encode('utf8')
Exemplo n.º 5
0
def makeFrenchQuery(myQuery):
    terms = parseQuery(myQuery)
    translator = Translator('myPythonTranslate64',
                            'YqNxIMdOAoaF5Am+/BT84sdQ1q7ZCNA1stU0viWcGi4=')
    frenchQuery = []
    for term in terms:
        translate = translator.translate(term, lang_from='en', lang_to='fr')
        frenchQuery.append(translate)
    return frenchQuery
Exemplo n.º 6
0
def btranslate(text_message, langfrom, langto):
    client_id = config.get('microsoft', 'client_id')
    client_secret = config.get('microsoft', 'client_secret')

    translator = Translator(client_id, client_secret)
    phrase_translated = translator.translate(text_message,
                                             lang_from=langfrom,
                                             lang_to=langto)

    return phrase_translated.encode('utf8')
Exemplo n.º 7
0
    def analyse_entry(self, entry, params):
        txt = entry['nif:isString']
        lang = params.get("lang")
        lang_to = params.get("lang_to")
        key = params.get("key")
        print(params)
        translator = Translator(key)
        txttranslated = translator.translate(txt,
                                             lang_from=lang,
                                             lang_to=lang_to)
        entry['output'] = {"nif:isString": txttranslated}

        yield entry
Exemplo n.º 8
0
 def get_translation(to_translate):
     try:
         t = Translator('dorsalfunbot', 'lWJjt3W86DqQX5J+VGCDsvD3LU9/eZFvG0VQj4k6J/Y=')
         from_lang = t.detect_lang(to_translate)
         if from_lang == 'en':
             to_lang = 'fr'
             header = 'en -> fr:'
         else:
             to_lang = 'en'
             header = '{} -> en:'.format(from_lang)
         trans_text = t.translate(to_translate, lang_from=from_lang, lang_to=to_lang)
         return '{} {}'.format(header, trans_text)
     except Exception as e:
         print("Translation error: {}".format(e))
         return None
def _checkTitle(title):
    res = ""
    try:
        translator = Translator(random.choice(key_choices))
        res = translator.detect_lang([title])
        #
        # # translator = Translator(random.choice(key_choices))
        # # res = translate_text(title,'es',)
        # # res = translator.detect_langs([title])
        # res = detect_language(title)
        # print(res)
        # res = TextBlob(title).detect_language()

    except (IndexError, ValueError):
        pass
    return res
Exemplo n.º 10
0
def translate(texts: Union[str, List[str]],
              target_language: Lang = Lang.en) -> List[str]:
    """Microsoft translation implementation, see `brain.feature.translate`"""
    warnings.warn("Azure is not used as translation service anymore",
                  DeprecationWarning,
                  stacklevel=2)
    if not isinstance(texts, list):
        texts = [texts]
    # todo: reuse enum from language_detect
    translate_client = Translator(_KEY)
    translations = [
        translate_client.translate(t, lang_to=target_language.name)
        for t in texts
    ]
    if not translations:
        raise Exception('no translations')

    return translations
Exemplo n.º 11
0
def engine():
    # Get question from user as text
    question = request.form['question']

    #translate question
    translator = Translator('emad_punk123456',
                            'R0go6LNQEj3CVh7nhyHw/DLenWLuQNjyjdhnZ0okRGE=')
    translated_question = translator.translate(question,
                                               lang_from='ar',
                                               lang_to='en')
    lower_q = translated_question.lower()
    #unicode_question=unicode(question, "utf-8")
    # Send question to api for question analysis and generate query
    # this Online App is An example For apply Quepy Model on DBpedia
    # We use it only as a quick demo but we have alot we need to do ..
    if translated_question != "":
        url = "http://quepy.machinalis.com/engine/get_query?question=" + lower_q

    #gs.translate(text, 'de', 'en')
    # encoded_url = quote_plus(url.encode("utf-8"))
    #   Get sparql query as json
    r = requests.get(url)
    j = r.json()
    # Select only the sparql code
    query = (j['queries'][0]['query'])
    #print (query)
    # Send sparql-query to SPARQLWrapper Model to run it on dbpedia
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    # Get results as json
    results = sparql.query().convert()
    #convert question string to array
    q2 = question.split(' ')
    #wiki_question = wikipedia.summary(question)
    # return json to answer page to view.
    return render_template('answer.html',
                           query=query,
                           question=lower_q,
                           results=results,
                           q2=q2,
                           url=url)
Exemplo n.º 12
0
    def translate(self, textToTranslate, languageList):
        if type(textToTranslate) is not str:
            raise ValueError("The argument textToTranslate must be string")
        #to save our translation limit the api calls are commented. uncoment to enable translation
        translator = Translator(
            'GeorgiKaradjov', 'Y2c414NBMQlVgVPZK7vmFT7WZ/DJ4sKRYsTxG9NAXlQ=')
        tempTranslation = textToTranslate
        languageFrom = Language.English
        for language in languageList:
            if issubclass(type(language), Language):
                tempTranslation = translator.translate(
                    tempTranslation,
                    lang_from=languageFrom.value,
                    lang_to=language.value)
                languageFrom = language
            else:
                raise ValueError(
                    "You must pass only valid values from Languages enum")

        tempTranslation = translator.translate(tempTranslation,
                                               lang_from=languageFrom.value,
                                               lang_to=Language.English.value)
        return tempTranslation
Exemplo n.º 13
0
    def run(self):
        response = ''
        try:
            if self.source == "gtranslateweb":
                user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
                host = 'https://translate.google.com/m'
                headers = {
                    'User-Agent': user_agent,
                }

                params = {
                    'sl': self.options['source_language'],
                    'tl': self.options['target_language'],
                    'q': self.options['source_text']
                }
                html = requests.get(host, params=params, headers=headers)
                soup = BeautifulSoup(html.text, "lxml")
                for div in soup.find_all('div'):
                    if div.has_attr('class'):
                        if div['class'][0] == 't0':
                            response = div.string
            elif self.source == "mstranslator":
                self.translator = Translator(self.mstranslate_key)
                response = self.translator.translate(
                    self.options['source_text'],
                    lang_from=self.options['source_language'],
                    lang_to=self.options['target_language'])
            elif self.source == "yandex":
                self.translator = YandexTranslate(self.yandex_key)
                response = self.translator.translate(
                    self.options['source_text'],
                    self.options['source_language'] + "-" +
                    self.options['target_language'])['text'][0]
        except Exception as e:
            response = '[Error] ' + str(e)
        if not self.aborted:
            self.finished.emit(self.source, response)
Exemplo n.º 14
0
import json
from mstranslator import Translator

with open("all_translated_tweets.json") as f:
    d = json.load(f)
f.close()


def translate_tweets(translator, tweets):
    r_tweets = []
    for tweet in tweets:
        r_tweets.append(translator.translate(tweet, lang_to='en'))
    return r_tweets


translator = Translator('8a050fdaf6b74b22af7589f7261a3f0a')

country = "Colombia"
for tweets in d[country][1]:
    all_eng = True
    count = 0
    eng_count = 0
    for each in tweets:
        print(country, count, eng_count)
        if (translator.detect_lang(each) != 'en'):
            count += 1
            if (count > 5):
                all_eng = False
                break
        else:
            eng_count += 1
Exemplo n.º 15
0
import wikipedia
from mstranslator import Translator
translator = Translator("8dc97e03193b4674b160cfe2b07c8223")
print(translator.translate('Привет, мир!', lang_from='ru', lang_to='en'))
import wikipedia
wikipedia.set_lang("en")
mlen = wikipedia.page("machine learning")
en_content = mlen.content
wikipedia.set_lang("fr")
#print(mlen.content)
mlfr = wikipedia.page("machine learning")
fr_content = mlfr.content
#print(mlfr.content)
Exemplo n.º 16
0
def translate_2(text, source_language_code, target_language_code):
	client_id = "irlab-pan-author-obfuscation-16"
	client_secret = "TWGKKcKQ/VoASe1EksnZBNNVA8mlThBsPQ/5z7Wqkkk="
	translator = Translator(client_id, client_secret)
	translation = translator.translate( text, lang_from=source_language_code, lang_to=target_language_code)
	return translation
from nltk import FreqDist
from nltk.tokenize import sent_tokenize, word_tokenize, RegexpTokenizer
from nltk.corpus import stopwords
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from io import StringIO
from shutil import move
from collections import Counter

from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument

dataPath = os.path.abspath(os.path.relpath('../data'))
translator = Translator('KEY')


class LanguageDetection():
    def __init__(self):
        pass

    def _getNumPages(self, id):
        try:
            pdf_file = PyPDF2.PdfFileReader(
                open(os.path.join(dataPath, 'pdf/tocheck',
                                  str(id) + '.pdf'), 'rb'))
            return pdf_file.getNumPages()
        except:
            return ""
Exemplo n.º 18
0
 def setUp(self):
     self.translator = Translator(client_id, client_secret)
     self.translator_mock = TranslatorMock(client_id, client_secret)
Exemplo n.º 19
0
import dicttoxml
from flask import abort, jsonify, Response
import json
import time
from tweet_tracker_api.job_management import job
from mstranslator import Translator
from tweet_tracker_api.MongoDBFacade import MongoDBFacade
from flask import session
import logging

tweets = None
ram_tweets = None
translator = Translator('4+9RqJt9le3aEYoc6sfoYDgTkMy+xXVUL7g4U9Nrz6w=')


def setup(collection, ram_collection):
    """ This function allows server.py to set up the collection.

    :param collection: The MongoDB collection object to get tweets from.
    """
    global tweets
    global ram_tweets
    tweets = collection
    ram_tweets = ram_collection


from bson import ObjectId
from bson import json_util


class JSONEncoder(json.JSONEncoder):
def request_image(window, keyword, num_of_try=0, translate=True):
    """
    Queries Bing for images and retries up to 5 times if the randomly selected image could not be accessed
    :param keyword:
        string which specifies the image content
    :param num_of_try:
        internal parameter that increases if the selected image could not be retrieved (e.g. Forbidden Error)
    :param translate:
        Should the keyword be translated to english before the search? (may increase result size)
    :return:
        The image data in bytes
    """

    if keyword is None:
        return None
    if translate:
        ms_key = open('../ms.key').read()
        trans = Translator('__RealTimeStoryIllustrator__', ms_key)
        translatedkw = trans.translate(keyword, lang_from='de', lang_to='en')
        print("IMAGE SERVICE: Getting image for " + str(keyword) + ". Searched for the english translation '" +
              str(translatedkw) + "'.")
    else:
        translatedkw = keyword
        print("IMAGE SERVICE: Getting image for " + str(keyword) + ".")

    if num_of_try > 5:  # no images were found
        logger.error("IMAGE SERVICE: Could not find an image after 5 tries for " + str(translatedkw) + ".")
        return None

    # OLD CODE FOR SEARCHING BEGIN

    # term = urllib.parse.quote_plus(translatedkw)

    # sites = [line.rstrip() for line in
    #         open(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'sites.txt'),
    #              encoding="utf-8")]
    # excludedsites = ""
    # for site in sites:
    #    excludedsites = excludedsites + "-site:" + urllib.parse.quote_plus(site) + '%20'

    # img_type = '%7Eillustration+AND+clipart'
    # opener = urllib.request.build_opener()
    # opener.addheaders = [('User-agent', 'Mozilla/5.0')]

    # url = ('http://ajax.googleapis.com/ajax/services/search/images?' +
    #      'v=1.0&q=' + term + '%20' + img_type + '%20' + excludedsites + '%20&userip=91.141.0.105' +
    #       '&rsz=8&imgsz=medium&safe=active' + '&tbs=ic:color')

    # OLD CODE FOR SEARCHING END

    try:
        params = {'$format': 'json', '$top': 10, 'ImageFilters': '\'Size:Small\''}
        bing_key = open('../bing.key').read()
        api = BingSearchAPI(bing_key)
        result = api.search_image(str(translatedkw+'+AND+(illustration+OR+clipart)'), params)
        amount = len(result.json()['d']['results'])
        # print(json.dumps(result.json(), sort_keys=True, indent=2))

        # print(result.json())
        # print(result.json()['d']['results'][0]['MediaUrl'])
        img_num = random.randint(0, amount-1)
        data = urllib.request.urlopen(result.json()['d']['results'][img_num]['MediaUrl'], timeout=2).read()
        return data
    except Exception as e:  # have to catch everything since socket exceptions seem to be broken
        print("ERROR in IMAGE SERVICE: Trying again, request was denied "+str(e))
        return request_image(window, keyword, num_of_try + 1, translate=translate)
Exemplo n.º 21
0
import os
import csv
from mstranslator import Translator

CLIENT_ID = 'NLP'
CLIENT_SECRET = '7I2PWW9SJDRNj72bIjTqh7xOn7eke+rPNDx94JUKJEA='


translator = Translator('NLP', CLIENT_SECRET)
files_in_dir = os.listdir("Data/Raw/")

for datum in files_in_dir:
	print "Translating " + datum
	o = open("Data/Translated/"+datum,'w')
	g = open("Data/Raw/"+datum,'r')
	f = csv.reader(g, delimiter=',')
	allposts = []
	writer = csv.writer(o)
	for line in f:
		allposts.append(translator.translate(line[0].decode("utf-8"), lang_from='hi', lang_to='en'))
	allposts = [[row] for row in allposts]
	writer.writerows(allposts)
		# o.write(gs.translate(line,'hi'))
	o.close()
	g.close()

print "Translation complete"
Exemplo n.º 22
0
from langdetect import detect
from mongodb.connection import getMongoConnection
import config
import sys
from mstranslator import Translator
translator = Translator('c64707b4ecb74bd7b4a78ead5fa7b708')


def translate(company_name):
    conn = getMongoConnection()
    db = conn[config.Mongo_DB_NAME]
    print db
    print "something yar"
    while db[company_name].find({"translated": True}).count() > 0:
        job = db[company_name].find_one({"translated": True})
        lan = detect(job['snippet'])
        if str(lan) != 'en':
            for key in job:
                if key == "city" or key == "snippet":
                    job[key] = translator.translate(job[key], lang_to="en")
                    db[company_name].update({"url": job["url"]},
                                            {"$set": {
                                                key: job[key]
                                            }},
                                            upsert=False,
                                            multi=False)
                if key == "job_summary":
                    flag = 0
                    try:
                        job[key] = translator.translate(unicode(
                            job[key], "utf-8"),
Exemplo n.º 23
0
	def run(self, options):
		#self.mutex.lock()
		
		self.running = True

		suggestions_html = '<table border="0.5" cellspacing="0" cellpadding="2" width="100%" style="border-color:gray;">'

		if self.tm_source_segments_cache is None:
			self.refresh_tm()

		if not self.running:
			return

		if self.tm_source_segments_cache:
			matching_segments = db_op.get_translation_memory(self.tm_path, self.tm_source_segments_cache, options['target_language'], options['source_text'], 60)
		else:
			matching_segments = []

		if not self.running:
			return
		
		if options['context']:
			suggestions_html += '<tr>'
			suggestions_html += '<td valign="middle"><img src="images/code_white_24dp.svg"></td>'
			suggestions_html += '<td><font color="gray">Occurrences (first 4):</font>'
			for index, occurrence in enumerate(options['context']):
				if index > 3:
					break
				suggestions_html += '<br>' + occurrence[0] + ':' + occurrence[1]
			suggestions_html += '</td></tr>'

		if options['previous_text']:
			if options['previous_text'] != '':
					suggestions_html += '<tr>'
					suggestions_html += '<td valign="middle"><img src="images/undo_white_24dp.svg"></td>'
					suggestions_html += '<td><font color="gray">Previous text:</font><br>'
					suggestions_html += html.escape(options['previous_text'])
					suggestions_html += '</td></tr>'

		for index, row in enumerate(matching_segments):
			suggestions_html += '<tr>'
			suggestions_html += '<td valign="middle"><img src="images/storage_white_24dp.svg"></td>'
			suggestions_html += '<td><font color="gray">TM match (' + str(row[0]) + '%):</font><br>'
			suggestions_html += html.escape(row[1])
			suggestions_html += '<br><font color="gray">Translated text:</font><br>'
			suggestions_html += html.escape(row[2])
			suggestions_html += '</td></tr>'
			if index + 1 >= self.limit:
				break
			if not self.running:
				return
		suggestions_html += '</table>'
		self.finished.emit(suggestions_html)

		#Machine translation
		settings = QtCore.QSettings("Babelruins.org", "BlackCAT")
		translator = Translator(settings.value('plugins_mstranslate_api_key', ""))
		try:
			mst_response = translator.translate(options['source_text'], options['source_language'], options['target_language'])
			if mst_response:
				#suggestions_html += '<table border="0.5" cellspacing="0" cellpadding="2" width="100%" style="border-color:gray;">'
				suggestions_html = suggestions_html[:-8]
				suggestions_html += '<tr>'
				suggestions_html += '<td valign="middle"><img src="images/computer_white_24dp.svg"></td>'
				suggestions_html += '<td><font color="gray">Microsoft Translate:</font><br>'
				suggestions_html += html.escape(mst_response)
				suggestions_html += '</td></tr>'
				suggestions_html += '</table>'
				#suggestions_html.replace('</table>', mt_html)
				if not self.running:
					return
				self.finished.emit(suggestions_html)
		except Exception as e:
			print(str(e))
Exemplo n.º 24
0
 def dict_mapping(word, key=key_user):
     translator = Translator(key)
     return translator.translate(word, lang_from='en', lang_to='th')
import goslate
import sqlite3

conn = sqlite3.connect('../database/twitter.sqlite')
cur = conn.cursor()

from mstranslator import Translator
translator = Translator('XXXXXXXXXXXX')

#cur.executescript('ALTER TABLE followers ADD loc_ind INTEGER;')
#cur.execute('UPDATE followers SET loc_ind = 0')

while True:
    cur.execute('SELECT id FROM t2 WHERE loc_ind <> 1')
    try:
        set = cur.fetchone()[0]
    except:
        print("All text has been translated")
        break
    cur.execute('SELECT location FROM t2 WHERE id = ?', (set, ))
    loc = cur.fetchone()[0]
    try:
        val = translator.translate(loc, lang_to='en')
        cur.execute('UPDATE t2 SET location = ? WHERE id = ?', (val, set))
        cur.execute('UPDATE t2 SET loc_ind = 1 WHERE id = ?', (set, ))
        conn.commit()
    except:
        cur.execute('UPDATE t2 SET location = ? WHERE id = ?', (val, set))
        cur.execute('UPDATE t2 SET loc_ind = 1 WHERE id = ?', (set, ))
        conn.commit()
cur.close()
Exemplo n.º 26
0
def languageDetection():
    # sql = 'select id from resolved_papers where downloaded = 1 and npages >= 5 and pdf2text = 1 and english = 0 and id in (12,	70,	74,	77,	92,	108,	110,	111,	113,	127,	128,	129,	133,	136,	145,	149,	151,	189,	210,	223,	238,	247,	253,	276,	287,	289,	291,	292,	303,	308,	345,	346,	347,	349,	350,	351,	354,	355,	359,	360,	361,	362,	363,	364,	365,	368,	377,	381,	389,	393,	395,	406,	414,	424,	439,	446,	448,	549,	554,	558,	574,	577,	578,	579,	581,	582,	583,	585,	588,	589,	591,	592,	595,	597,	601,	604,	605,	609,	613,	621,	625,	682,	684,	712,	713,	714,	715,	716,	717,	719,	722,	723,	724,	726,	730,	731,	732,	734,	735,	738,	739,	740,	743,	749,	751,	752,	753,	754,	755,	758,	765,	782,	787,	816,	822,	830,	836,	851,	857,	860,	861,	869,	882,	970,	1044,	1045,	1047,	1050,	1052,	1055,	1056,	1057,	1058,	1060,	1061,	1062,	1063,	1064,	1065,	1066,	1068,	1069,	1072,	1073,	1074,	1075,	1076,	1079,	1080,	1083,	1084,	1086,	1087,	1089,	1094,	1100,	1104,	1105,	1106,	1115,	1116,	1117,	1122,	1124,	1125,	1126,	1131,	1133,	1142,	1143,	1146,	1150,	1151,	1172,	1174,	1176,	1184,	1194,	1248,	1283,	1301,	1307,	1309,	1367,	1381,	1417,	1419,	1452,	1456,	1482,	1491,	1507,	1511,	1513,	1522,	1542,	1562,	1585,	1587,	1591,	1624,	1626,	1628,	1652,	1687,	1688,	1689,	1692,	1693,	1694,	1696,	1698,	1699,	1701,	1704,	1710,	1711,	1714,	1716,	1719,	1720,	1727,	1728,	1730,	1745,	1750,	1751,	1755,	1757,	1770,	1809,	1815,	1820,	1831,	1835,	1872,	1884,	1887,	1898,	1935,	1955,	1993,	2009,	2025,	2026,	2029,	2030,	2031,	2199,	2241,	2244,	2246,	2275,	2276,	2277,	2278,	2279,	2305,	2323,	2324,	2325,	2327,	2328,	2347,	2360,	2402,	2404,	2410,	2415,	2442,	2448,	2450,	2451,	2452,	2461,	2462,	2467,	2477,	2509,	2510,	2512,	2513,	2518,	2522,	2524,	2531,	2543,	2547,	2554,	2555,	2576,	2577,	2578,	2579,	2580,	2583,	2586,	2605,	2609,	2624,	2629,	2646,	2651,	2652,	2653,	2655,	2656,	2659,	2661,	2662,	2671,	2676,	2677,	2756,	2757,	2758,	2760,	2761,	2762,	2768,	2771,	2772,	2773,	2774,	2776,	2777,	2781,	2782,	2783,	2786,	2789,	2790,	2791,	2792,	2793,	2794,	2795,	2798,	2811,	2815,	2822,	2869,	2884,	2907,	2913,	2920,	2924,	3029,	3127,	3141,	3146,	3172,	3173,	3174,	3175,	3176,	3177,	3178,	3180,	3182,	3183,	3184,	3185,	3189,	3192,	3194,	3198,	3199,	3202,	3203,	3207,	3208,	3211,	3223,	3224,	3230,	3236,	3252,	3253,	3262,	3275,	3302,	3305,	3316,	3365,	3388,	3389,	3391,	3392,	3396,	3397,	3398,	3399,	3400,	3401,	3402,	3405,	3406,	3408,	3409,	3412,	3415,	3416,	3418,	3419,	3420,	3421,	3422,	3423,	3424,	3425,	3426,	3427,	3428,	3431,	3432,	3433,	3436,	3438,	3439,	3443,	3444,	3445,	3446,	3450,	3452,	3455,	3456,	3458,	3461,	3466,	3467,	3470,	3503,	3526,	3532,	3536,	3538,	3541,	3542,	3543,	3549,	3563,	3573,	3597,	3598,	3620,	3626,	3662,	3819,	3921,	3922,	3923,	3925,	3927,	3931,	3932,	3933,	3934,	3935,	3936,	3937,	3938,	3939,	3940,	3942,	3943,	3944,	3945,	3948,	3950,	3952,	3953,	3954,	3955,	3957,	3958,	3959,	3960,	3961,	3963,	3965,	3966,	3967,	3968,	3971,	3972,	3980,	3988,	3995,	4000,	4005,	4011,	4039,	4043,	4046,	4048,	4050,	4059,	4077,	4086,	4089,	4098,	4101,	4104,	4109,	4111,	4123,	4127,	4170,	4184,	4203,	4215,	4221,	4235,	4287,	4295,	4345,	4362,	4367,	4448,	4449,	4451,	4452,	4453,	4454,	4455,	4457,	4458,	4459,	4460,	4461,	4462,	4463,	4464,	4465,	4466,	4467,	4468,	4469,	4470,	4472,	4478,	4480,	4481,	4482,	4496,	4500,	4504,	4508,	4513,	4518,	4523,	4524,	4548,	4551,	4567,	4572,	4598,	4607,	4608,	4611,	4657,	4786,	4788,	4789,	4791,	4792,	4793,	4794,	4795,	4796,	4797,	4798,	4799,	4804,	4805,	4811,	4815,	4817,	4819,	4829,	4839,	4840,	5037,	5038,	5040,	5047,	5179,	5191,	5192,	5210,	5248,	5249,	5266,	5275,	5276,	5322,	5323,	5327,	5330,	5362,	5410,	5411,	5416,	5451,	5462,	5493,	5494,	5496,	5519,	5536,	5548,	5555,	5587,	5588,	5589,	5590,	5591,	5594,	5599,	5617,	5633,	5636,	5660,	5667,	5695,	5697,	5701,	5702,	5706,	5767,	5768,	5769,	5773,	5778,	5786,	5831,	5832,	5833,	5835,	5836,	5837,	5839,	5844,	5849,	5850,	5858,	5860,	5889,	5901,	5915,	5916,	5918,	5920,	5991,	5992,	5993,	5994,	5995,	6009,	6045,	6079,	6080,	6081,	6083,	6084,	6085,	6086,	6087,	6100,	6101,	6107,	6185,	6249,	6278,	6279,	6280,	6281,	6282,	6283,	6285,	6305,	6306,	6387,	6393,	6396,	6397,	6398,	6411,	6439,	6498,	6505,	6511,	6513,	6518,	6520,	6524,	6525,	6526,	6527,	6532,	6543,	6553,	6555,	6565,	6566,	6569,	6573,	6574,	6581,	6585,	6601,	6605,	6606,	6612,	6615,	6617,	6621,	6645,	6646,	6648,	6651,	6652,	6658,	6660,	6667,	6672,	6676,	6682,	6684,	6688,	6690,	6692,	6693,	6700,	6704,	6743,	6769,	6771,	6772,	6775,	6778,	6783,	6785,	6789,	6793,	6818,	6824,	6829,	6830,	6834,	6839,	6845,	6846,	6849,	6850,	6855,	6859,	6866,	6873,	6878,	6887,	6888,	6889,	6890,	6907,	6926,	6945,	6948,	6954,	6963,	7006,	7066,	7082,	7102,	7121,	7162,	7163,	7271,	7272,	7273,	7285,	7314,	7315,	7350,	7362,	7364,	7398,	7441,	7442,	7443,	7444,	7446,	7451,	7454,	7456,	7462,	7464,	7504,	7515,	7516,	7547,	7548,	7634,	7659,	7660,	7661,	7662,	7663,	7664,	7665,	7672,	7776,	7777,	7783,	7784,	7788,	7789,	7792,	7795,	7797,	7798,	7799,	7809,	7831,	7889,	7917,	7918,	7920,	7926,	7930,	7932,	7933,	7935,	7936,	7941,	7944,	7960,	7962,	7971,	8008,	8017,	8070,	8075,	8076,	8110,	8111,	8112,	8117,	8120,	8128,	8129,	8130,	8133,	8136,	8140,	8143,	8144,	8145,	8148,	8149,	8150,	8153,	8154,	8159,	8163,	8203,	8225,	8268,	8270,	8302,	8310,	8312,	8419,	8421,	8496,	8497,	8498,	8500,	8505,	8506,	8507,	8508,	8510,	8513,	8517,	8533,	8543,	8584,	8710,	8717,	8718,	8719,	8720,	8721,	8722,	8724,	8726,	8730,	8732,	8733,	8734,	8737,	8739,	8740,	8741,	8742,	8743,	8744,	8745,	8747,	8748,	8750,	8751,	8752,	8753,	8754,	8755,	8756,	8757,	8759,	8761,	8764,	8766,	8768,	8769,	8773,	8774,	8775,	8784,	8811,	8817,	9042,	9056,	9207,	9219,	9240,	9249,	9273,	9318,	9322,	9422,	9457,	9485,	9562,	9623,	9647,	9836,	9837,	9922,	10067,	10068,	10069,	10168,	10185,	10288,	10400,	10401,	10513,	10515,	10606,	10700,	10702,	10703,	10771,	10772,	10819,	10821,	10927,	11019,	11056,	11113,	11142,	11143,	11225,	11226,	11227,	11343,	11361,	11362,	11364,	11377,	11448,	11460,	11461,	11462,	11463,	11465,	11466,	11468,	11493,	11609,	11610,	11611,	11617,	11638,	11659,	11718,	11748,	11749,	11750,	11751,	11762,	11821,	11850,	11891,	11898,	11911,	11913,	11914,	11915,	11916,	11917,	11918,	11919,	11920,	11921,	11922,	11923,	11926,	11928,	11934,	11955,	11980,	12026,	12030,	12044,	12092,	12093,	12094,	12095,	12096,	12098,	12100,	12101,	12102,	12103,	12104,	12105,	12106,	12107,	12108,	12109,	12110,	12111,	12112,	12113,	12114,	12122,	12123,	12125,	12144,	12147,	12234,	12235,	12237,	12256,	12305,	12339,	12346,	12407,	12448,	12511,	12665,	12705,	12706,	12708,	12709,	12710,	12711,	12712,	12713,	12714,	12716,	12717,	12718,	12719,	12720,	12721,	12722,	12725,	12729,	12742,	12753,	12762,	12802,	12813,	12816,	12821,	12823,	12843,	12856,	12905,	12907,	13006,	13061,	13062,	13063,	13137,	13138,	13198,	13329,	13330,	13331,	13332,	13494,	13495,	13582,	13583,	13584,	13585,	13586,	13697,	13833,	13834,	13835,	13836,	13837,	13840,	14160,	14161,	14200,	14341,	14342,	14343,	14590,	14591,	14597,	14610,	14614,	14631,	14632,	14633,	14634,	14635,	14650,	14655,	14656,	14689,	14726,	14777,	14870,	14871,	14872,	14921,	14922,	14923,	14991,	14992,	14993,	14994,	14995,	15136,	15137,	15138,	15139,	15140,	15141,	15142,	15143,	15152,	15216,	15265,	15277,	15387,	15388,	15483,	15546,	15550,	15587,	15590,	15623,	15641,	15653,	15711,	15712,	15730,	15743,	15763,	15794,	15805,	15821,	15831,	15884,	15932,	16039,	16122,	16124,	16153,	16175,	16181,	16220,	16233,	16264,	16277,	16306,	16361,	16377,	16391,	16392,	16393,	16402,	16404,	16431,	16439,	16440,	16444,	16447,	16448,	16455,	16457,	16463,	16468,	16513,	16524,	16528,	16551,	16569,	16594,	16596,	16600,	16610,	16647,	16648,	16718,	16731,	16763,	16765,	16794,	16795,	16899,	16948,	16962,	16993,	16998,	17011,	17013,	17034,	17061,	17062,	17141,	17142,	17143,	17144,	17155,	17158,	17248,	17262,	17263,	17264,	17265,	17266,	17333,	17334,	17335,	17395,	17396,	17398,	17400,	17401,	17405,	17410,	17412,	17417,	17420,	17431,	17547,	17584,	17585,	17587,	17599,	17674,	17676,	17677,	17679,	17711,	17719,	17749,	17750,	17751,	17752,	17753,	17754,	17756,	17757,	17811,	17812,	17814,	17948,	17963,	17964,	17965,	17989,	17998,	18083,	18139,	18145,	18165,	18229,	18230,	18257,	18264,	18273,	18321,	18322,	18323,	18351,	18515,	18548,	18599,	18600,	18623,	18637,	18675,	18676,	18687,	18698,	18736,	18753,	18768,	18792,	18794,	18797,	18823,	18828,	18830,	18850,	18851,	18853,	18854,	18857,	18882,	18885,	18886,	18887,	18888,	18891,	18892,	18893,	18894,	18898,	18901,	18904,	18930,	18947,	18967,	18968,	18970,	18972,	18973,	18974,	18976,	18977,	18980,	18982,	18983,	18984,	18985,	18986,	18991,	19006,	19059,	19060,	19061,	19062,	19064,	19066,	19067,	19069,	19071,	19103,	19104,	19110,	19116,	19153,	19180,	19181,	19186,	19263,	19272,	19273,	19280,	19318,	19409,	19425,	19428,	19456,	19528,	19531,	19538,	19606,	19607,	19609,	19610,	19612,	19613,	19616,	19623,	19636,	19647,	19648,	19685,	19798,	19799,	19800,	19801,	19802,	19805,	19806,	19807,	19808,	19811,	19812,	19813,	19816,	19820,	19821,	19836,	19874,	19875,	19878,	19960,	19985,	20051,	20052,	20053,	20054,	20055,	20056,	20057,	20058,	20059,	20061,	20062,	20063,	20064,	20065,	20066,	20069,	20070,	20071,	20072,	20074,	20078,	20079,	20081,	20084,	20088,	20090,	20110,	20156,	20157,	20168,	20189,	20193,	20245,	20344,	20345,	20346,	20347,	20348,	20349,	20350,	20353,	20354,	20355,	20356,	20357,	20358,	20359,	20360,	20361,	20362,	20363,	20365,	20368,	20370,	20371,	20373,	20374,	20377,	20391,	20392,	20396,	20398,	20400,	20444,	20476,	20520,	20682,	20685,	20687,	20688,	20689,	20690,	20691,	20692,	20693,	20694,	20695,	20698,	20699,	20700,	20701,	20702,	20703,	20707,	20709,	20714,	20728,	20760,	20774,	20864,	20865,	20866,	20867,	20868,	20869,	20870,	20872,	20874,	20899,	20909,	20962,	21041,	21042,	21117,	21118,	21121,	21139,	21146,	21227,	21271,	21272,	21273,	21274,	21275,	21425,	21430,	21493,	21505,	21507,	21510,	21513,	21612,	21616,	21621,	21622,	21623,	21624,	21667,	21675,	21751,	21765,	21766,	21767,	21846,	21847,	21856,	21857,	21858,	21871,	21872,	21873,	21875,	21876,	21877,	21881,	21883,	21885,	21924,	21925,	21957,	21977,	21978,	21979,	21980,	21984,	21985,	21993,	21997,	21999,	22001,	22031,	22033,	22082,	22113,	22175,	22228,	22247,	22271,	22272,	22371,	22374,	22462,	22463,	22613,	22694,	22695,	22696,	22697,	22700,	22880,	22881,	22882,	22883,	22884,	22901,	22977,	22978,	22979,	22981,	23030,	23032,	23191,	23230,	23236,	23238,	23291,	23340,	23453,	23552,	23553,	23744,	23761,	23774,	24016,	24025,	24037,	24085,	24090,	24096,	24125,	24126,	24128,	24129,	24130,	24132,	24133,	24140,	24141,	24142,	24145,	24150,	24151,	24152,	24153,	24155,	24168,	24169,	24170,	24171,	24172,	24173,	24174,	24181,	24186,	24187,	24189,	24190,	24192,	24193,	24206,	24207,	24208,	24209,	24210,	24211,	24212,	24213,	24214,	24239,	24243,	24244,	24246,	24247,	24249,	24250,	24251,	24252,	24253,	24254,	24255,	24256,	24257,	24258,	24261,	24290,	24297,	24298,	24299,	24300,	24301,	24302,	24303,	24304,	24305,	24307,	24308,	24315,	24326,	24330,	24334,	24335,	24336,	24350,	24364,	24365,	24366,	24367,	24368,	24371,	24372,	24390,	24391,	24393,	24405,	24406,	24408,	24411,	24412,	24413,	24415,	24438,	24439,	24440,	24473,	24474,	24476,	24477,	24478,	24479,	24480,	24481,	24483,	24484,	24485,	24486,	24487,	24520,	24522,	24523,	24524,	24525,	24526,	24527,	24528,	24529,	24530,	24531,	24532,	24533,	24535,	24536,	24537,	24540,	24541,	24542,	24543,	24544,	24545,	24546,	24547,	24549,	24550,	24576,	24586,	24621,	24622,	24623,	24624,	24625,	24626,	24627,	24628,	24629,	24630,	24631,	24632,	24633,	24634,	24635,	24636,	24637,	24638,	24639,	24640,	24641,	24642,	24644,	24645,	24646,	24647,	24648,	24651,	24652,	24653,	24654,	24655,	24656,	24657,	24712,	24713,	24714,	24715,	24716,	24717,	24719,	24720,	24721,	24722,	24723,	24724,	24731,	24775,	24795,	24812,	24831,	24833,	24835,	24836,	24845,	24846,	24851,	24869,	24877,	24888,	24889,	24907,	24926,	24952,	25091,	25169,	25177,	25178,	25195,	25206,	25247,	25248,	25251,	25267,	25340,	25345,	25455,	25456,	25460,	25464,	25754,	25822,	25845,	25865,	25890,	25891,	25893,	25914,	25975,	25976,	25978,	25980,	25982,	25986,	25996,	26003,	26074,	26112,	26143,	26172,	26182,	26183,	26186,	26194,	26202,	26283,	26284,	26287,	26289,	26293,	26303,	26316,	26320,	26322,	26463,	26465,	26467,	26469,	26476,	26481,	26486,	26489,	26497,	26596,	26663,	26678,	26717,	27136,	27183,	27307,	27340,	27341,	27342,	27344,	27348,	27355,	27607,	27608,	27609,	27610,	27623,	27635,	27641,	27922,	27937,	28165,	28263,	28277,	28422,	28433,	28437,	28508,	28738,	28739,	28740,	28743,	28748,	28820,	28990,	28993,	28997,	29008,	29009,	29010,	29011,	29079,	29084,	29090,	29093,	29101,	29102,	29104,	29105,	29106,	29112,	29113,	29114,	29119,	29120,	29122,	29123,	29124,	29125,	29129,	29130,	29133,	29134,	29135,	29137,	29139,	29146,	29147,	29172,	29174,	29176,	29184,	29191,	29192,	29194,	29200,	29201,	29203,	29221,	29224,	29225,	29226,	29232,	29234,	29258,	29265,	29268,	29273,	29274,	29275,	29276,	29277,	29278,	29280,	29281,	29282,	29300,	29301,	29302,	29310,	29313,	29314,	29315,	29316,	29320,	29382,	29435,	29436,	29454,	29457,	29458,	29468,	29469,	29470,	29473,	29475,	29476,	29477,	29481,	29482,	29483,	29485,	29500,	29501,	29503,	29504,	29505,	29508,	29513,	29515,	29524,	29532,	29533,	29534,	29535,	29537,	29549,	29553,	29556,	29561,	29574,	29618,	29634,	29635,	29637,	29639,	29665,	29666,	29668,	29669,	29672,	29682,	29693,	29709,	29710,	29711,	29717,	29741,	29742,	29746,	29747,	29752,	29753,	29755,	29756,	29759,	29804,	29805,	29832,	29998,	30003,	30005,	30006,	30007,	30009,	30019,	30025,	30040,	30074,	30075,	30077,	30078,	30080,	30082,	30083,	30084,	30290,	30291,	30293,	30349,	30350,	30351,	30352,	30353,	30354,	30358,	30376,	30392,	30424,	30426,	30589,	30590,	30591,	30613,	30614,	30615,	30616,	30617,	30619,	30627,	30628,	30647,	30954,	30958,	30985,	30986,	31316,	31317,	31331,	31334,	31336,	31357,	31358,	31359,	31360,	31497,	31501,	31502,	31503,	31504,	31526,	31527,	31528,	31882,	31883,	31884,	31890,	31891,	31892,	31893,	31894,	31929,	31966,	31970,	32153,	32498,	32520,	32583,	32618,	32683,	32769,	32780,	32788,	32847,	32848,	32857,	32872,	33058,	33148,	33153,	33255,	33275,	33279,	33300,	33513,	33519,	33520,	33521,	33522,	33524,	33525,	33527,	33528,	33534,	33578,	33579,	33580,	33581,	33582,	33584,	33585,	33586,	33587,	33589,	33591,	33593,	33594,	33599,	33600,	33602,	33619,	33634,	33655,	33753,	33845,	33846,	33866,	33868,	33869,	33871,	33873,	33883,	33888,	33890,	33891,	33907,	33926,	33931,	33933,	33934,	33936,	33972,	33973,	33978,	33987,	33988,	33989,	33990,	33991,	33992,	33993,	33997,	33998,	34000,	34001,	34007,	34015,	34050,	34058,	34081,	34082,	34085,	34086,	34089,	34091,	34092,	34095,	34260,	34265,	34293,	34294,	34295,	34296,	34297,	34309,	34315,	34316,	34320,	34346,	34399,	34419,	34461,	34462,	34463,	34464,	34465,	34469,	34503,	34527,	34590,	34816,	34827,	34845,	34846,	34849,	34852,	34853,	34863,	34941,	34971,	35015,	35020,	35134,	35136,	35144,	35156,	35206,	35221,	35264,	35285,	35292,	35294,	35295,	35296,	35299,	35300,	35301,	35309,	35311,	35315,	35321,	35323,	35324,	35328,	35329,	35330,	35331,	35332,	35342,	35343,	35347,	35351,	35356,	35357,	35386,	35415,	35428,	35440,	35459,	35467,	35471,	35474,	35529,	35562,	35575,	35634,	35637,	35646,	35655,	35663,	35691,	35704,	35732,	35733,	35744,	35835,	35853,	35881,	35884,	35887,	35889,	35893,	35894,	35896,	35897,	35898,	35899,	35900,	35901,	35902,	35907,	35909,	35910,	35917,	35918,	35920,	35921,	35923,	35926,	35928,	35929,	35930,	35939,	35941,	35943,	35944,	35948,	35949,	35950,	35951,	35953,	35954,	35957,	35979,	35997,	35998,	36000,	36018,	36021,	36023,	36089,	36093,	36098,	36099,	36102,	36105,	36111,	36136,	36154,	36172,	36173,	36175,	36193,	36200,	36210,	36223,	36225,	36226,	36229,	36230,	36233,	36239,	36240,	36241,	36242,	36244,	36246,	36247,	36248,	36249,	36258,	36264,	36267,	36269,	36370,	36433,	36437,	36469,	36479,	36480,	36481,	36504,	36515,	36520,	36521,	36529,	36530,	36550,	36584,	36599,	36600,	36608,	36614,	36666,	36674,	36685,	36707,	36717,	36736,	36743,	36756,	36760,	36775,	36784,	36785,	36787,	36804,	36830,	36843,	36844,	36850,	36854,	36860,	36870,	36874,	36875,	36876,	36877,	36879,	36952,	36958,	36979,	36980,	36991,	36996,	37050,	37051,	37058,	37092,	37093,	37111,	37117,	37120,	37123,	37137,	37142,	37147,	37148,	37149,	37150,	37151,	37152,	37170,	37176,	37187,	37190,	37192,	37193,	37198,	37201,	37205,	37209,	37217,	37221,	37226,	37227,	37231,	37242,	37244,	37255,	37266,	37319,	37324,	37352,	37365,	37375,	37415,	37429,	37448,	37450,	37452,	37495,	37518,	37519,	37569,	37570,	37572,	37573,	37576,	37597,	37608,	37627,	37676,	37677,	37735,	37743,	37748,	37749,	37750,	37751,	37756,	37758,	37766,	37767,	37792,	37801,	37805,	37807,	37808,	37812,	37828,	37834,	37835,	37838,	37840,	37841,	37842,	37843,	37844,	37845,	37846,	37849,	37850,	37852,	37854,	37863,	37866,	37873,	37877,	37880,	37881,	37883,	37897,	37900,	37908,	37927,	37996,	38008,	38081,	38085,	38091,	38092,	38161,	38183,	38187,	38195,	38200,	38282,	38292,	38300,	38302,	38303,	38309,	38314,	38316,	38317,	38321,	38360,	38368,	38374,	38382,	38398,	38399,	38402,	38403,	38410,	38411,	38420,	38429,	38431,	38439,	38452,	38464,	38467,	38483,	38499,	38500,	38514,	38515,	38530,	38533,	38547,	38548,	38556,	38558,	38559,	38560,	38561,	38563,	38564,	38565,	38566,	38567,	38568,	38569,	38571,	38574,	38575,	38578,	38619,	38635);'
    sql = 'select id from resolved_papers where downloaded = 1 and npages >= 5 and pdf2text = 1 and english = 0 and id in (12,	70,	74,	77,	92,	108,	110,	111,	113,	127,	128,	129,	133,	136,	145,	149,	151,	189,	210,	223,	238,	247,	253,	276,	287,	289,	291,	292,	303,	308,	345,	346,	347,	349,	350,	351,	354,	355,	359,	360,	361,	362,	363,	364,	365,	368,	377,	381,	389,	393,	395,	406,	414,	424,	439,	446,	448,	549,	554,	558,	574,	577,	578,	579,	581,	582,	583,	585,	588,	589,	591,	592,	595,	597,	601,	604,	605,	609,	613,	621,	625,	682,	684,	712,	713,	714,	715,	716,	717,	719,	722,	723,	724,	726,	730,	731,	732,	734,	735,	738,	739,	740,	743,	749,	751,	752,	753,	754,	755,	758,	765,	782,	787,	816,	822,	830,	836,	851,	857,	860,	861,	869,	882,	970,	1044,	1045,	1047,	1050,	1052,	1055,	1056,	1057,	1058,	1060,	1061,	1062,	1063,	1064,	1065,	1066,	1068,	1069,	1072,	1073,	1074,	1075,	1076,	1079,	1080,	1083,	1084,	1086,	1087,	1089,	1094,	1100,	1104,	1105,	1106,	1115,	1116,	1117,	1122,	1124,	1125,	1126,	1131,	1133,	1142,	1143,	1146,	1150,	1151,	1172,	1174,	1176,	1184,	1194,	1248,	1283,	1301,	1307,	1309,	1367,	1381,	1417,	1419,	1452,	1456,	1482,	1491,	1507,	1511,	1513,	1522,	1542,	1562,	1585,	1587,	1591,	1624,	1626,	1628,	1652,	1687,	1688,	1689,	1692,	1693,	1694,	1696,	1698,	1699,	1701,	1704,	1710,	1711,	1714,	1716,	1719,	1720,	1727,	1728,	1730,	1745,	1750,	1751,	1755,	1757,	1770,	1809,	1815,	1820,	1831,	1835,	1872,	1884,	1887,	1898,	1935,	1955,	1993,	2009,	2025,	2026,	2029,	2030,	2031,	2199,	2241,	2244,	2246,	2275,	2276,	2277,	2278,	2279,	2305,	2323,	2324,	2325,	2327,	2328,	2347,	2360,	2402,	2404,	2410,	2415,	2442,	2448,	2450,	2451,	2452,	2461,	2462,	2467,	2477,	2509,	2510,	2512,	2513,	2518,	2522,	2524,	2531,	2543,	2547,	2554,	2555,	2576,	2577,	2578,	2579,	2580,	2583,	2586,	2605,	2609,	2624,	2629,	2646,	2651,	2652,	2653,	2655,	2656,	2659,	2661,	2662,	2671,	2676,	2677,	2756,	2757,	2758,	2760,	2761,	2762,	2768,	2771,	2772,	2773,	2774,	2776,	2777,	2781,	2782,	2783,	2786,	2789,	2790,	2791,	2792,	2793,	2794,	2795,	2798,	2811,	2815,	2822,	2869,	2884,	2907,	2913,	2920,	2924,	3029,	3127,	3141,	3146,	3172,	3173,	3174,	3175,	3176,	3177,	3178,	3180,	3182,	3183,	3184,	3185,	3189,	3192,	3194,	3198,	3199,	3202,	3203,	3207,	3208,	3211,	3223,	3224,	3230,	3236,	3252,	3253,	3262,	3275,	3302,	3305,	3316,	3365,	3388,	3389,	3391,	3392,	3396,	3397,	3398,	3399,	3400,	3401,	3402,	3405,	3406,	3408,	3409,	3412,	3415,	3416,	3418,	3419,	3420,	3421,	3422,	3423,	3424,	3425,	3426,	3427,	3428,	3431,	3432,	3433,	3436,	3438,	3439,	3443,	3444,	3445,	3446,	3450,	3452,	3455,	3456,	3458,	3461,	3466,	3467,	3470,	3503,	3526,	3532,	3536,	3538,	3541,	3542,	3543,	3549,	3563,	3573,	3597,	3598,	3620,	3626,	3662,	3819,	3921,	3922,	3923,	3925,	3927,	3931,	3932,	3933,	3934,	3935,	3936,	3937,	3938,	3939,	3940,	3942,	3943,	3944,	3945,	3948,	3950,	3952,	3953,	3954,	3955,	3957,	3958,	3959,	3960,	3961,	3963,	3965,	3966,	3967,	3968,	3971,	3972,	3980,	3988,	3995,	4000,	4005,	4011,	4039,	4043,	4046,	4048,	4050,	4059,	4077,	4086,	4089,	4098,	4101,	4104,	4109,	4111,	4123,	4127,	4170,	4184,	4203,	4215,	4221,	4235,	4287,	4295,	4345,	4362,	4367,	4448,	4449,	4451,	4452,	4453,	4454,	4455,	4457,	4458,	4459,	4460,	4461,	4462,	4463,	4464,	4465,	4466,	4467,	4468,	4469,	4470,	4472,	4478,	4480,	4481,	4482,	4496,	4500,	4504,	4508,	4513,	4518,	4523,	4524,	4548,	4551,	4567,	4572,	4598,	4607,	4608,	4611,	4657,	4786,	4788,	4789,	4791,	4792,	4793,	4794,	4795,	4796,	4797,	4798,	4799,	4804,	4805,	4811,	4815,	4817,	4819,	4829,	4839,	4840,	5037,	5038,	5040,	5047,	5179,	5191,	5192,	5210,	5248,	5249,	5266,	5275,	5276,	5322,	5323,	5327,	5330,	5362,	5410,	5411,	5416,	5451,	5462,	5493,	5494,	5496,	5519,	5536,	5548,	5555,	5587,	5588,	5589,	5590,	5591,	5594,	5599,	5617,	5633,	5636,	5660,	5667,	5695,	5697,	5701,	5702,	5706,	5767,	5768,	5769,	5773,	5778,	5786,	5831,	5832,	5833,	5835,	5836,	5837,	5839,	5844,	5849,	5850,	5858,	5860,	5889,	5901,	5915,	5916,	5918,	5920,	5991,	5992,	5993,	5994,	5995,	6009,	6045,	6079,	6080,	6081,	6083,	6084,	6085,	6086,	6087,	6100,	6101,	6107,	6185,	6249,	6278,	6279,	6280,	6281,	6282,	6283,	6285,	6305,	6306,	6387,	6393,	6396,	6397,	6398,	6411,	6439,	6498,	6505,	6511,	6513,	6518,	6520,	6524,	6525,	6526,	6527,	6532,	6543,	6553,	6555,	6565,	6566,	6569,	6573,	6574,	6581,	6585,	6601,	6605,	6606,	6612,	6615,	6617,	6621,	6645,	6646,	6648,	6651,	6652,	6658,	6660,	6667,	6672,	6676,	6682,	6684,	6688,	6690,	6692,	6693,	6700,	6704,	6743,	6769,	6771,	6772,	6775,	6778,	6783,	6785,	6789,	6793,	6818,	6824,	6829,	6830,	6834,	6839,	6845,	6846,	6849,	6850,	6855,	6859,	6866,	6873,	6878,	6887,	6888,	6889,	6890,	6907,	6926,	6945,	6948,	6954,	6963,	7006,	7066,	7082,	7102,	7121,	7162,	7163,	7271,	7272,	7273,	7285,	7314,	7315,	7350,	7362,	7364,	7398,	7441,	7442,	7443,	7444,	7446,	7451,	7454,	7456,	7462,	7464,	7504,	7515,	7516,	7547,	7548,	7634,	7659,	7660,	7661,	7662,	7663,	7664,	7665,	7672,	7776,	7777,	7783,	7784,	7788,	7789,	7792,	7795,	7797,	7798,	7799,	7809,	7831,	7889,	7917,	7918,	7920,	7926,	7930,	7932,	7933,	7935,	7936,	7941,	7944,	7960,	7962,	7971,	8008,	8017,	8070,	8075,	8076,	8110,	8111,	8112,	8117,	8120,	8128,	8129,	8130,	8133,	8136,	8140,	8143,	8144,	8145,	8148,	8149,	8150,	8153,	8154,	8159,	8163,	8203,	8225,	8268,	8270,	8302,	8310,	8312,	8419,	8421,	8496,	8497,	8498,	8500,	8505,	8506,	8507,	8508,	8510,	8513,	8517,	8533,	8543,	8584,	8710,	8717,	8718,	8719,	8720,	8721,	8722,	8724,	8726,	8730,	8732,	8733,	8734,	8737,	8739,	8740,	8741,	8742,	8743,	8744,	8745,	8747,	8748,	8750,	8751,	8752,	8753,	8754,	8755,	8756,	8757,	8759,	8761,	8764,	8766,	8768,	8769,	8773,	8774,	8775,	8784,	8811,	8817,	9042,	9056,	9207,	9219,	9240,	9249,	9273,	9318,	9322,	9422,	9457,	9485,	9562,	9623,	9647,	9836,	9837,	9922,	10067,	10068,	10069,	10168,	10185,	10288,	10400,	10401,	10513,	10515,	10606,	10700,	10702,	10703,	10771,	10772,	10819,	10821,	10927,	11019,	11056,	11113,	11142,	11143,	11225,	11226,	11227,	11343,	11361,	11362,	11364,	11377,	11448,	11460,	11461,	11462,	11463,	11465,	11466,	11468,	11493,	11609,	11610,	11611,	11617,	11638,	11659,	11718,	11748,	11749,	11750,	11751,	11762,	11821,	11850,	11891,	11898,	11911,	11913,	11914,	11915,	11916,	11917,	11918,	11919,	11920,	11921,	11922,	11923,	11926,	11928,	11934,	11955,	11980,	12026,	12030,	12044,	12092,	12093,	12094,	12095,	12096,	12098,	12100,	12101,	12102,	12103,	12104,	12105,	12106,	12107,	12108,	12109,	12110,	12111,	12112,	12113,	12114,	12122,	12123,	12125,	12144,	12147,	12234,	12235,	12237,	12256,	12305,	12339,	12346,	12407,	12448,	12511,	12665,	12705,	12706,	12708,	12709,	12710,	12711,	12712,	12713,	12714,	12716,	12717,	12718,	12719,	12720,	12721,	12722,	12725,	12729,	12742,	12753,	12762,	12802,	12813,	12816,	12821,	12823,	12843,	12856,	12905,	12907,	13006,	13061,	13062,	13063,	13137,	13138,	13198,	13329,	13330,	13331,	13332,	13494,	13495,	13582,	13583,	13584,	13585,	13586,	13697,	13833,	13834,	13835,	13836,	13837,	13840,	14160,	14161,	14200,	14341,	14342,	14343,	14590,	14591,	14597,	14610,	14614,	14631,	14632,	14633,	14634,	14635,	14650,	14655,	14656,	14689,	14726,	14777,	14870,	14871,	14872,	14921,	14922,	14923,	14991,	14992,	14993,	14994,	14995,	15136,	15137,	15138,	15139,	15140,	15141,	15142,	15143,	15152,	15216,	15265,	15277,	15387,	15388,	15483,	15546,	15550,	15587,	15590,	15623,	15641,	15653,	15711,	15712,	15730,	15743,	15763,	15794,	15805,	15821,	15831,	15884,	15932,	16039,	16122,	16124,	16153,	16175,	16181,	16220,	16233,	16264,	16277,	16306,	16361,	16377,	16391,	16392,	16393,	16402,	16404,	16431,	16439,	16440,	16444,	16447,	16448,	16455,	16457,	16463,	16468,	16513,	16524,	16528,	16551,	16569,	16594,	16596,	16600,	16610,	16647,	16648,	16718,	16731,	16763,	16765,	16794,	16795,	16899,	16948,	16962,	16993,	16998,	17011,	17013,	17034,	17061,	17062,	17141,	17142,	17143,	17144,	17155,	17158,	17248,	17262,	17263,	17264,	17265,	17266,	17333,	17334,	17335,	17395,	17396,	17398,	17400,	17401,	17405,	17410,	17412,	17417,	17420,	17431,	17547,	17584,	17585,	17587,	17599,	17674,	17676,	17677,	17679,	17711,	17719,	17749,	17750,	17751,	17752,	17753,	17754,	17756,	17757,	17811,	17812,	17814,	17948,	17963,	17964,	17965,	17989,	17998,	18083,	18139,	18145,	18165,	18229,	18230,	18257,	18264,	18273,	18321,	18322,	18323,	18351,	18515,	18548,	18599,	18600,	18623,	18637,	18675,	18676,	18687,	18698,	18736,	18753,	18768,	18792,	18794,	18797,	18823,	18828,	18830,	18850,	18851,	18853,	18854,	18857,	18882,	18885,	18886,	18887,	18888,	18891,	18892,	18893,	18894,	18898,	18901,	18904,	18930,	18947,	18967,	18968,	18970,	18972,	18973,	18974,	18976,	18977,	18980,	18982,	18983,	18984,	18985,	18986,	18991,	19006,	19059,	19060,	19061,	19062,	19064,	19066,	19067,	19069,	19071,	19103,	19104,	19110,	19116,	19153,	19180,	19181,	19186,	19263,	19272,	19273,	19280,	19318,	19409,	19425,	19428,	19456,	19528,	19531,	19538,	19606,	19607,	19609,	19610,	19612,	19613,	19616,	19623,	19636,	19647,	19648,	19685,	19798,	19799,	19800,	19801,	19802,	19805,	19806,	19807,	19808,	19811,	19812,	19813,	19816,	19820,	19821,	19836,	19874,	19875,	19878,	19960,	19985,	20051,	20052,	20053,	20054,	20055,	20056,	20057,	20058,	20059,	20061,	20062,	20063,	20064,	20065,	20066,	20069,	20070,	20071,	20072,	20074,	20078,	20079,	20081,	20084,	20088,	20090,	20110,	20156,	20157,	20168,	20189,	20193,	20245,	20344,	20345,	20346,	20347,	20348,	20349,	20350,	20353,	20354,	20355,	20356,	20357,	20358,	20359,	20360,	20361,	20362,	20363,	20365,	20368,	20370,	20371,	20373,	20374,	20377,	20391,	20392,	20396,	20398,	20400,	20444,	20476,	20520,	20682,	20685,	20687,	20688,	20689,	20690,	20691,	20692,	20693,	20694,	20695,	20698,	20699,	20700,	20701,	20702,	20703,	20707,	20709,	20714,	20728,	20760,	20774,	20864,	20865,	20866,	20867,	20868,	20869,	20870,	20872,	20874,	20899,	20909,	20962,	21041,	21042,	21117,	21118,	21121,	21139,	21146,	21227,	21271,	21272,	21273,	21274,	21275,	21425,	21430,	21493,	21505,	21507,	21510,	21513,	21612,	21616,	21621,	21622,	21623,	21624,	21667,	21675,	21751,	21765,	21766,	21767,	21846,	21847,	21856,	21857,	21858,	21871,	21872,	21873,	21875,	21876,	21877,	21881,	21883,	21885,	21924,	21925,	21957,	21977,	21978,	21979,	21980,	21984,	21985,	21993,	21997,	21999,	22001,	22031,	22033,	22082,	22113,	22175,	22228,	22247,	22271,	22272,	22371,	22374,	22462,	22463,	22613,	22694,	22695,	22696,	22697,	22700,	22880,	22881,	22882,	22883,	22884,	22901,	22977,	22978,	22979,	22981,	23030,	23032,	23191,	23230,	23236,	23238,	23291,	23340,	23453,	23552,	23553,	23744,	23761,	23774,	24016,	24025,	24037,	24085,	24090,	24096,	24125,	24126,	24128,	24129,	24130,	24132,	24133,	24140,	24141,	24142,	24145,	24150,	24151,	24152,	24153,	24155,	24168,	24169,	24170,	24171,	24172,	24173,	24174,	24181,	24186,	24187,	24189,	24190,	24192,	24193,	24206,	24207,	24208,	24209,	24210,	24211,	24212,	24213,	24214,	24239,	24243,	24244,	24246,	24247,	24249,	24250,	24251,	24252,	24253,	24254,	24255,	24256,	24257,	24258,	24261,	24290,	24297,	24298,	24299,	24300,	24301,	24302,	24303,	24304,	24305,	24307,	24308,	24315,	24326,	24330,	24334,	24335,	24336,	24350,	24364,	24365,	24366,	24367,	24368,	24371,	24372,	24390,	24391,	24393,	24405,	24406,	24408,	24411,	24412,	24413,	24415,	24438,	24439,	24440,	24473,	24474,	24476,	24477,	24478,	24479,	24480,	24481,	24483,	24484,	24485,	24486,	24487,	24520,	24522,	24523,	24524,	24525,	24526,	24527,	24528,	24529,	24530,	24531,	24532,	24533,	24535,	24536,	24537,	24540,	24541,	24542,	24543,	24544,	24545,	24546,	24547,	24549,	24550,	24576,	24586,	24621,	24622,	24623,	24624,	24625,	24626,	24627,	24628,	24629,	24630,	24631,	24632,	24633,	24634,	24635,	24636,	24637,	24638,	24639,	24640,	24641,	24642,	24644,	24645,	24646,	24647,	24648,	24651,	24652,	24653,	24654,	24655,	24656,	24657,	24712,	24713,	24714,	24715,	24716,	24717,	24719,	24720,	24721,	24722,	24723,	24724,	24731,	24775,	24795,	24812,	24831,	24833,	24835,	24836,	24845,	24846,	24851,	24869,	24877,	24888,	24889,	24907,	24926,	24952,	25091,	25169,	25177,	25178,	25195,	25206,	25247,	25248,	25251,	25267,	25340,	25345,	25455,	25456,	25460,	25464,	25754,	25822,	25845,	25865,	25890,	25891,	25893,	25914,	25975,	25976,	25978,	25980,	25982,	25986,	25996,	26003,	26074,	26112,	26143,	26172,	26182,	26183,	26186,	26194,	26202,	26283,	26284,	26287,	26289,	26293,	26303,	26316,	26320,	26322,	26463,	26465,	26467,	26469,	26476,	26481,	26486,	26489,	26497,	26596,	26663,	26678,	26717,	27136,	27183,	27307,	27340,	27341,	27342,	27344,	27348,	27355,	27607,	27608,	27609,	27610,	27623,	27635,	27641,	27922,	27937,	28165,	28263,	28277,	28422,	28433,	28437,	28508,	28738,	28739,	28740,	28743,	28748,	28820,	28990,	28993,	28997,	29008,	29009,	29010,	29011,	29079,	29084,	29090,	29093,	29101,	29102,	29104,	29105,	29106,	29112,	29113,	29114,	29119,	29120,	29122,	29123,	29124,	29125,	29129,	29130,	29133,	29134,	29135,	29137,	29139,	29146,	29147,	29172,	29174,	29176,	29184,	29191,	29192,	29194,	29200,	29201,	29203,	29221,	29224,	29225,	29226,	29232,	29234,	29258,	29265,	29268,	29273,	29274,	29275,	29276,	29277,	29278,	29280,	29281,	29282,	29300,	29301,	29302,	29310,	29313,	29314,	29315,	29316,	29320,	29382,	29435,	29436,	29454,	29457,	29458,	29468,	29469,	29470,	29473,	29475,	29476,	29477,	29481,	29482,	29483,	29485,	29500,	29501,	29503,	29504,	29505,	29508,	29513,	29515,	29524,	29532,	29533,	29534,	29535,	29537,	29549,	29553,	29556,	29561,	29574,	29618,	29634,	29635,	29637,	29639,	29665,	29666,	29668,	29669,	29672,	29682,	29693,	29709,	29710,	29711,	29717,	29741,	29742,	29746,	29747,	29752,	29753,	29755,	29756,	29759,	29804,	29805,	29832,	29998,	30003,	30005,	30006,	30007,	30009,	30019,	30025,	30040,	30074,	30075,	30077,	30078,	30080,	30082,	30083,	30084,	30290,	30291,	30293,	30349,	30350,	30351,	30352,	30353,	30354,	30358,	30376,	30392,	30424,	30426,	30589,	30590,	30591,	30613,	30614,	30615,	30616,	30617,	30619,	30627,	30628,	30647,	30954,	30958,	30985,	30986,	31316,	31317,	31331,	31334,	31336,	31357,	31358,	31359,	31360,	31497,	31501,	31502,	31503,	31504,	31526,	31527,	31528,	31882,	31883,	31884,	31890,	31891,	31892,	31893,	31894,	31929,	31966,	31970,	32153,	32498,	32520,	32583,	32618,	32683,	32769,	32780,	32788,	32847,	32848,	32857,	32872,	33058,	33148,	33153,	33255,	33275,	33279,	33300,	33513,	33519,	33520,	33521,	33522,	33524,	33525,	33527,	33528,	33534,	33578,	33579,	33580,	33581,	33582,	33584,	33585,	33586,	33587,	33589,	33591,	33593,	33594,	33599,	33600,	33602,	33619,	33634,	33655,	33753,	33845,	33846,	33866,	33868,	33869,	33871,	33873,	33883,	33888,	33890,	33891,	33907,	33926,	33931,	33933,	33934,	33936,	33972,	33973,	33978,	33987,	33988,	33989,	33990,	33991,	33992,	33993,	33997,	33998,	34000,	34001,	34007,	34015,	34050,	34058,	34081,	34082,	34085,	34086,	34089,	34091,	34092,	34095,	34260,	34265,	34293,	34294,	34295,	34296,	34297,	34309,	34315,	34316,	34320,	34346,	34399,	34419,	34461,	34462,	34463,	34464,	34465,	34469,	34503,	34527,	34590,	34816,	34827,	34845,	34846,	34849,	34852,	34853,	34863,	34941,	34971,	35015,	35020,	35134,	35136,	35144,	35156,	35206,	35221,	35264,	35285,	35292,	35294,	35295,	35296,	35299,	35300,	35301,	35309,	35311,	35315,	35321,	35323,	35324,	35328,	35329,	35330,	35331,	35332,	35342,	35343,	35347,	35351,	35356,	35357,	35386,	35415,	35428,	35440,	35459,	35467,	35471,	35474,	35529,	35562,	35575,	35634,	35637,	35646,	35655,	35663,	35691,	35704,	35732,	35733,	35744,	35835,	35853,	35881,	35884,	35887,	35889,	35893,	35894,	35896,	35897,	35898,	35899,	35900,	35901,	35902,	35907,	35909,	35910,	35917,	35918,	35920,	35921,	35923,	35926,	35928,	35929,	35930,	35939,	35941,	35943,	35944,	35948,	35949,	35950,	35951,	35953,	35954,	35957,	35979,	35997,	35998,	36000,	36018,	36021,	36023,	36089,	36093,	36098,	36099,	36102,	36105,	36111,	36136,	36154,	36172,	36173,	36175,	36193,	36200,	36210,	36223,	36225,	36226,	36229,	36230,	36233,	36239,	36240,	36241,	36242,	36244,	36246,	36247,	36248,	36249,	36258,	36264,	36267,	36269,	36370,	36433,	36437,	36469,	36479,	36480,	36481,	36504,	36515,	36520,	36521,	36529,	36530,	36550,	36584,	36599,	36600,	36608,	36614,	36666,	36674,	36685,	36707,	36717,	36736,	36743,	36756,	36760,	36775,	36784,	36785,	36787,	36804,	36830,	36843,	36844,	36850,	36854,	36860,	36870,	36874,	36875,	36876,	36877,	36879,	36952,	36958,	36979,	36980,	36991,	36996,	37050,	37051,	37058,	37092,	37093,	37111,	37117,	37120,	37123,	37137,	37142,	37147,	37148,	37149,	37150,	37151,	37152,	37170,	37176,	37187,	37190,	37192,	37193,	37198,	37201,	37205,	37209,	37217,	37221,	37226,	37227,	37231,	37242,	37244,	37255,	37266,	37319,	37324,	37352,	37365,	37375,	37415,	37429,	37448,	37450,	37452,	37495,	37518,	37519,	37569,	37570,	37572,	37573,	37576,	37597,	37608,	37627,	37676,	37677,	37735,	37743,	37748,	37749,	37750,	37751,	37756,	37758,	37766,	37767,	37792,	37801,	37805,	37807,	37808,	37812,	37828,	37834,	37835,	37838,	37840,	37841,	37842,	37843,	37844,	37845,	37846,	37849,	37850,	37852,	37854,	37863,	37866,	37873,	37877,	37880,	37881,	37883,	37897,	37900,	37908,	37927,	37996,	38008,	38081,	38085,	38091,	38092,	38161,	38183,	38187,	38195,	38200,	38282,	38292,	38300,	38302,	38303,	38309,	38314,	38316,	38317,	38321,	38360,	38368,	38374,	38382,	38398,	38399,	38402,	38403,	38410,	38411,	38420,	38429,	38431,	38439,	38452,	38464,	38467,	38483,	38499,	38500,	38514,	38515,	38530,	38533,	38547,	38548,	38556,	38558,	38559,	38560,	38561,	38563,	38564,	38565,	38566,	38567,	38568,	38569,	38571,	38574,	38575,	38578,	38619,	38635);'
    print(sql)
    papers = pd.read_sql(sql, con=db)

    for index, row in papers.iterrows():

        lang = None
        id = row[0]
        english = 0
        other = 0
        text = ""
        res = ""
        print(id)
        if id:

            # with open(os.path.join('data/txt', str(id) + '.txt')) as infile:
            with open(
                    os.path.join(
                        '/Volumes/SeagateBackupPlusDrive/CLPD2019_FULL/txt',
                        str(id) + '.txt')) as infile:
                for line in infile:
                    if not re.match(r'^\s*$', line):
                        line = re.sub(r"-\n", "", line)
                        line = re.sub(r"\n", " ", line)
                        text += line
                infile.close()
            lenText = len(text)

            nrequest = round(float(lenText) / 5000)
            count = 1
            while count <= nrequest:
                res = ''
                content = ""

                posIni = (count * 5000) - 5000
                posFin = (count * 5000) - 1

                content += text[posIni:posFin]
                try:
                    translator = Translator(random.choice(key_choices))
                    res = translator.detect_lang([content])

                except:
                    pass
                if res:
                    if res == 'en':
                        english += 1
                    else:
                        other += 1
                count += 1
            if english > other:
                lang = "English"
                sql = "update resolved_papers set english = 1 where id = %s" % (
                    id)
            else:
                lang = "Other"
            try:
                cur.execute(sql)
                db.commit()
            except:
                db.rollback()
        print("Id: %s. Language: %s" % (id, lang))
    print("Done!")
Exemplo n.º 27
0
class TranslatorTestCase(unittest.TestCase):
    def setUp(self):
        self.translator = Translator(client_id, client_secret)
        self.translator_mock = TranslatorMock(client_id, client_secret)

    def test_translate(self):
        t = self.translator.translate('world', 'en', 'ru')
        self.assertEqual('мир', t)

    def test_translate_array(self):
        ts = self.translator.translate_array(['hello', 'world'], 'en', 'ru')
        translations = [t['TranslatedText'] for t in ts]
        self.assertEqual(['Привет', 'мир'], translations)

    def test_translate_array2(self):
        ts = self.translator.translate_array2(['hello', 'world', 'Hello. How are you?'], 'en', 'ru')
        translations = [t['TranslatedText'] for t in ts]
        self.assertEqual(['Привет', 'мир', 'Привет. Как ваши дела?'], translations)
        alignments = [t['Alignment'] for t in ts]
        self.assertEqual(['0:4-0:5', '0:4-0:2', '0:5-0:6 7:18-8:21'], alignments)

    def test_get_translations(self):
        t = self.translator.get_translations('world', 'en', 'ru')
        self.assertIsInstance(t, dict)
        self.assertIn('Translations', t)

    def test_break_sentences(self):
        t = self.translator.break_sentences('Hello. How are you?', 'en')
        self.assertEqual(['Hello. ', 'How are you?'], t)

    def test_add_translation(self):
        url = self.translator_mock.add_translation('orig', 'trans', 'en', 'ru', user='******')
        self.assertIn('originalText=orig', url)
        self.assertIn('translatedText=trans', url)

    def test_get_langs(self):
        langs = self.translator.get_langs()
        self.assertIsInstance(langs, list)
        self.assertIn('en', langs)

    def test_get_lang_names(self):
        lang_names = self.translator.get_lang_names(['ru', 'en'], 'en')
        self.assertEqual(['Russian', 'English'], lang_names)

    def test_get_speackable_langs(self):
        langs = self.translator.get_langs(speakable=True)
        self.assertIsInstance(langs, list)
        self.assertIn('en-us', langs)

    def test_detect_lang(self):
        self.assertEqual('en', self.translator.detect_lang('Hello'))

    def test_detect_langs(self):
        self.assertEqual(['en', 'ru'], self.translator.detect_langs(['Hello', 'Привет']))

    def test_speak(self):
        self.assertIsNotNone(self.translator.speak('Hello', 'en'))

    def test_speak_to_file(self):
        s = StringIO()
        self.translator.speak_to_file(s, 'Hello', 'en')
        s.seek(0)
        self.assertTrue(len(s.read()) > 0)
Exemplo n.º 28
0
# -*- coding: utf-8 -*-
from langdetect import detect
from mongodb.connection import getMongoConnection
import config
import sys
from mstranslator import Translator
translator = Translator('a4462ffdf09e4ce08bb2df759478229a')


def translate(company_name):
    conn = getMongoConnection()
    db = conn[config.Mongo_DB_NAME]
    while db[company_name].find({"translated": "false"}).count() > 0:
        job = db[company_name].find_one({"translated": "false"})
        try:
            lan = detect(unicode(job['job_summary'], "utf-8"))
        except:
            lan = detect(job['job_summary'])
        print lan
        print job
        if str(lan) != 'en':
            for key in job:
                if key == "city" or key == "snippet":
                    job[key] = translator.translate(job[key], lang_to="en")
                    db[company_name].update({"url": job["url"]},
                                            {"$set": {
                                                key: job[key]
                                            }},
                                            upsert=False,
                                            multi=False)
                if key == "job_summary":
Exemplo n.º 29
0
 def setUp(self):
     self.translator = Translator(SUBSCRIPTION_KEY)
     self.translator_mock = TranslatorMock(SUBSCRIPTION_KEY)
Exemplo n.º 30
0
import os
from string import join
import time
import random
from slackclient import SlackClient
from mstranslator import Translator
import config
import alify

#initialize translator API
translator = Translator(config.MSTRANS_ID)

# instantiate Slack & Twilio clients AAAAAND
# bot's ID as an environment variable or constant
######### STARTERBOT / BRUNDRETTFAMILY SLACK
#slack_client = SlackClient(os.environ.get('SLACK_BOT_TOKEN')) #brundrettfamily starterbot
#BOT_ID = os.environ.get("BOT_ID") #starterbot brundrettfamily
#BOT_NAME = "StarterBot"

######### TRANSBOT / PIZZABALLS SLACK
slack_client = SlackClient(config.TRANS_BOT_TOKEN)  #pizzaballs transbot
BOT_ID = config.TRANS_BOT_ID  #transbot / pizzaballs
BOT_NAME = "TransBot"

# constants
AT_BOT = "<@" + BOT_ID + ">"
EXAMPLE_COMMAND = "do"
TRANS_COMMAND = "nazify"


def handle_trans_cmd(command, channel):
Exemplo n.º 31
0
# -*- coding: utf-8 -*-
from mstranslator import Translator

translator = Translator('hswn_client_id',
                        'mLmqE4kYtcVddhVG2Vq2MLsSTDbhKCmuxqfsrU7Lj7M=')


def translate(word):
    return translator.translate(word, lang_from='hi', lang_to='en')
Exemplo n.º 32
0
def btranslate(text_message, langfrom, langto):

    translator = Translator(client_id, client_secret)
    phrase_translated = translator.translate(text_message, lang_from=langfrom, lang_to=langto)
    # print phrase_translated
    return phrase_translated.encode("utf8")
Exemplo n.º 33
0
from mstranslator import Translator

translator = Translator('your_own_access_key')


def trans(word):
    return translator.translate(word, lang_from='en', lang_to='pl')
Exemplo n.º 34
0
    https://textblob.readthedocs.io/en/dev/
    https://pypi.python.org/pypi/mstranslator
"""
# Twitter info
consumer_key = 'GW8wstK6iPAn4UY4FgnxfTd2h'
consumer_secret = '6pIpAy32sOAATULfZqQWrXbe5sbXaQdUofPys1mSVlEMkIzKAa'

access_token = '104984260-whCiE24vIwPgnx8Ym41LxEy8dldRRNFu8fLbiGzs'
access_token_secret = 'oanaiTCnCPCnh1rHkMT7FxHAUeuT5NEnQZTGGnrITYmTd'

# Microsoft DataMarket for translation - limited plan for 2 million characters per month
client_id = 'bruno_sentiment_analysis'
client_secret = 'Kd/gHVZgXTkXoAEGaK/or71IrEQr1DmG98q8eLQmaGU='

# MS Translator
translator = Translator(client_id, client_secret)

# Language properties for translation
LANG_FROM = 'pt'
LANG_TO = 'en'

# Authenticating on twitter
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

# API fully authenticated
api = tweepy.API(auth)

# Tweets
SEARCH_TERM = 'unisul'
public_tweets = api.search(SEARCH_TERM)
Exemplo n.º 35
0
#-*- coding: utf-8 -*-

from mstranslator import Translator
from dateTime import getCurrentDate
from dateTime import getCurrentTime

import sys
import goslate
import json
import MySQLdb
reload(sys)
sys.setdefaultencoding('utf-8')

CLIENT_ID = "translatelate"
CLIENT_SECRET = "GK7MyfCoz1NUrNuehRZBlLWLNpHMgDaAZoT8MkQOMlI="
translator_ms = Translator(CLIENT_ID, CLIENT_SECRET)

languages = ["af", "sq", "ar","be", "bg", "ca", "zh-CN", "zh-TW", "hr",
             "cs", "da", "nl", "en", "et", "tl", "fi", "fr", "gl", "de",
             "el", "iw", "hi", "hu", "is", "id", "ga", "it", "ja", "ko",
             "lv", "lt", "mk", "ms", "mt", "no", "fa", "pl", "pt", "ro",
             "ru", "sr", "sk", "sl", "es", "sw", "sv", "th", "tr", "uk",
             "vi", "cy", "yi"]

def validateLanguage(lang):
    if lang in languages:
        return True
    return False

def saveData(host, user, passwd, db, result, original, from_lang, to_lang, tableName):
    dBase = MySQLdb.connect(host, user, passwd, db, charset='utf8', use_unicode=True)
Exemplo n.º 36
0
# Name: pleaseTranslate (/u/pleaseTranslate)
# Author: Saroekin (/u/Saroekin)
# Version: Python 2.7.6

#Files or importations that are used elsewhere in program.
import os
import praw
import time
import traceback
import externals_pleaseTranslate
from mstranslator import Translator

#Setting up account.
translator = Translator(externals_pleaseTranslate.client_ID,
                        externals_pleaseTranslate.client_secret)

#User's username and password.
Username = externals_pleaseTranslate.username
Password = externals_pleaseTranslate.password

#What reddit sees from the bot's requests.
user_agent = externals_pleaseTranslate.user_agent
r = praw.Reddit(user_agent=user_agent)
print("\n\nLogging in. . .\n\n")
r.login(Username, Password)

#Set of list variables for program.
fullLangNames = externals_pleaseTranslate.fullLangNames
abbrevLangNames = externals_pleaseTranslate.abbrevLangNames
supportedLangs = externals_pleaseTranslate.supportedLangs
Exemplo n.º 37
0
                        site_id=MySQLdb.escape_string(str(doc['site_id'])),
                        link_text=link_text,
                        ftype=MySQLdb.escape_string(str(doc['ftype'])),
                        doc_type=MySQLdb.escape_string(str(doc['doc_type'])),
                        full_url=MySQLdb.escape_string(str(doc['full_url'])))
                    cursor.execute(sql)
                    #cursor.execute('INSERT INTO docs_sites (site_id, link_text, ftype, doc_type, full_url) VAALUES ({site_id}, "{link_text}", "{ftype}", "{doc_type}", "{full_url}")'
                    #    .format(site_id=MySQLdb.escape_string(doc['site_id']), link_text=MySQLdb.escape_string(doc['link_text']), ftype=MySQLdb.escape_string(doc['ftype']),
                    #        doc_type=MySQLdb.escape_string(doc['doc_type']), full_url=MySQLdb.escape_string(doc['full_url'])))

        #------------------------------------------------------------------------------------------------------
        # Запускаем автоперевод блоков, тоже в потоках
        # Если была такая настройка у проекта
        #------------------------------------------------------------------------------------------------------

        translator = Translator(trans_client, trans_secret)
        langs = getLangsProject(siteID)

        sql = 'SELECT * FROM blocks WHERE site_id = {projectID}'.format(
            projectID=siteID)

        cursor.execute(sql)
        blocks = cursor.fetchall()
        #        for lang in langs:
        #            langTo  = lang[3]
        #            langID  = lang[0]
        #            pool    = ThreadPool(2)
        #            results = pool.map(translateBlock, blocks)
        #            results = pool.map(createEmptyTranslate, blocks)
        #            pool.close()
        #            pool.join()
Exemplo n.º 38
0
#Bing translator Spanish->English
#English stopwords
#English SnowballStemmer

import sys
import re
from nltk.corpus import stopwords
from mstranslator import Translator
#English and Spanish stemmer available
from nltk.stem import snowball
import string
import regex
#English because we translate first
stemmer = snowball.EnglishStemmer(ignore_stopwords=False)

translator = Translator('60864ac93121426d8fbbb1e2581a8c3e')

stop_words_list = []
flat_stop_words_list = []
exclusion_list_en_es = []
has_hashtag_or_mention = [False] * 800

punctuation = []
punctuation.append(list(string.punctuation[2:6]))
punctuation.append(string.punctuation[9])
punctuation.append(list(string.punctuation[20:22]))


def make_stop_words_list():
    #exclude words which are in both dictionaries
    #bc they will be counted toward both languages and