def doTranslate(self):
        openDirName = self.getOpenFileName()
        trans = Translator(to_lang="ko")
        trans.from_lang = self.lang_code[str(self.cb_fromLang.currentText())]
        self.label.setText("Under translating")
        for langCode in self.lang_code.keys():
            try:
                writeFile = open(openDirName+"\\"+langCode+"_"+self.originalFileName,'w')
                transResult=''
                trans.to_lang = self.lang_code[langCode]
                for line in self.readText:
                    try:
                        transResult += trans.translate(line).encode('utf-8')+"\n"
                    except:
                        transResult += trans.translate(line)+'\n'
                writeFile.write(transResult)
                writeFile.close()
            except:
                print langCode
                print self.readText
                break

        self.label.setText("Working done")
示例#2
0
def process(source, outfile, _filter):
    if os.path.isfile(source):
        deny = []
        with open(_filter, 'r') as filter:
            for line in filter:
                deny.append(line.replace('\n', '').replace('\r', ''))
        tmp = []
        words = {}
        with open(source, 'r') as source:
            with open(outfile, 'w') as dic:
                for str in source:
                    tmp = normalize(str).split(' ')
                    for word in tmp:
                        if len(word) > 2:
                            if words.get(word):
                                words[word] += 1
                            else:
                                words[word] = 1
                # print words
                translator = Translator(to_lang="ru")
                for key, value in sorted(words.iteritems(), key=lambda (k, v): (v, k)):
                    if key not in deny:
                        dic.write(key + ' - ' + translator.translate(key).encode("utf8") + '\n')
                        # print "%s: %s" % (key, value)
    else:
        print 'Plese input correctly data for <source>!'
示例#3
0
def trans(flag,lang,word,delim):
  try:
    langNAme,langReq = word.split(delim,1)
    length=len(langReq)
    if ( length > maxLength):
      sc.api_call('chat.postMessage', 
                   username='******', 
                   icon_url=flag,  
                   as_user='******', 
                   channel=evt["channel"], 
                   text='Don\'t be a dick <@'+evt["user"]+'>')
    else:
      translator= Translator(to_lang=lang)
      l = translator.translate(langReq)
      sc.api_call('chat.postMessage', 
                   username='******', 
                   icon_emoji=flag,
                   as_user='******', 
                   channel=evt["channel"], 
                   text=l)
  except ValueError:
    sc.api_call('chat.postMessage', 
                 username='******', 
                 icon_url=flag,
                 as_user='******', 
                 channel=evt["channel"], 
                 text='Vhy try to anger botiana <@'+evt["user"]+'>?')
示例#4
0
	def twist(self, in_message):
		start_lang = 'en'
		last_lang = 'en'
		sentence =  in_message
		turns = 20

		first_step = ''

		for lang in range(len(lang_array)):
			if start_lang == lang_array[lang][1]:
				first_step += (lang_array[lang][0])

		steps = []
		for turn in range(turns):
			rand_int = r.randint(0,len(lang_array)-1)
			rand_lang = lang_array[rand_int][1]
			translator = Translator(to_lang = rand_lang, from_lang = last_lang)
			sentence = translator.translate(sentence)
			if sys.version_info.major == 2:
				sentence =sentence.encode(locale.getpreferredencoding())
			steps.append([sentence, rand_lang])
			print(str(turn + 1)+ '/' + str(turns) + ' (' + lang_array[rand_int][0] + ')')
			last_lang = rand_lang
		translator = Translator(to_lang = start_lang, from_lang = last_lang)
		sentence = translator.translate(sentence)
		sentence = sentence.capitalize()
		sentence = sentence.replace(' ,', ',')
		sentence = sentence.replace(' .', '.')
		sentence = sentence.replace(' \'', '\'')
		if sentence[len(sentence) - 1] != '.':
			sentence += '.'
		# print('\n' + steps)
		# print('\n' + sentence)
		return [steps, sentence]
示例#5
0
def main():
    arguements = docopt(
        data_io.set_up_doc(__doc__),
        version='0.1'
    )

    record = Record(debug=arguements['--debug'])
    if arguements['<data>']:
        # translation
        from_lang, to_lang, data = _extract(arguements)

        # translate data
        translator = Translator(from_lang, to_lang, data,
                                debug=arguements['--debug'])
        # result is a dictionary contains decoded infomation of the
        # trnaslation.
        result = translator.translate()
        translator.display_result(result)
        # add record
        record.add(from_lang, to_lang,
                   data, result)

    elif arguements['--record']:
        # display record
        record.display()
    else:
        raise Exception('No Implemented Yet.')
示例#6
0
def ask_mapping(token_list):
    "Asks the user for a mapping"
    translation_name = input('Enter code of target language("hi" for hindi): ')
    translation_name = translation_name.upper().strip()
    translator = Translator(to_lang=translation_name)
    mapping = {}
    for token in token_list:
        internal = False
        if token[:2] + token[-2:] == '____':
            token = token[2:-2]
            internal = True
        try:
            translation = translator.translate(token)
        except:
            print('>'*10, 'Token failed to translate :|', token + '|')
        translation = translation.replace(' ', '_').strip()
        if internal:
            token = '__' + token + '__'
            translation = '__' + translation + '__'
        mapped = {translation: token}
        print(mapped)
        mapping.update(mapped)
    f = open(translation_name, 'w')
    f.write(json.dumps(mapping,
                       ensure_ascii=False,
                       indent=4))
    f.close()
示例#7
0
def translate_excel():
    outfile = open("english_cv.md",'w')
    
    translator= Translator(from_lang="zh",to_lang="en")

    # sXXX for SOURCExxx
    sbook = open_workbook('source.xls',formatting_info= False )
    # tXXX means translatedXXX
    tbook = Workbook(encoding='utf-8')#write
    
    # read and write per sheet
    book_content = []
    for s in sbook.sheets():# s is a source sheet handle for read
        
        #add sheet
        try:
            utf_name = s.name.encode('utf8') #sheet names are all in unicode, translation need utf8
            tsheet_name = translator.translate(utf_name) #translator.translate method only accepts utf-8
            print s.name,tsheet_name
            tsheet = tbook.add_sheet(tsheet_name) # write sheet in tbook, name in english
            print_title(tsheet_name,outfile)#write
        except:
            print "error in sheet:",s.name,"\n"
            print_title(s.name,outfile)
            
        #add content
        
        rows_content = []
        for row in range(s.nrows):
            print "row:",row
            col_content = []
            for col in range(s.ncols):
                try:
                    utf_cell = s.cell(row,col).value.encode('utf8')
                    tcell_value = translator.translate(utf_cell)
                    tsheet.write(row,col,tcell_value)
                    col_content.append(tcell_value)
                except: #the value might be float
                    # print "row:",row,"col:",col,s.cell(row,col).value
                    # tsheet.write(row,col,s.cell(row,col).value)
                    
                    nontexts = s.cell(row,col).value
                    col_content.append(str(nontexts))
            row_value = "\t".join(col_content)
            rows_content.append(row_value)
            tsheet.flush_row_data()
            
        try:
            sheet_txt = "\n".join(rows_content).encode('utf8')
        except:
            sheet_txt = "\n".join(rows_content)
        all_lines = invert_txt_table(sheet_txt)
        print_table(all_lines,outfile)

    outfile.close()
    
    print "saving..."
    tbook.save('english_output.xls')
    print "saved."
def trans_to_english(sentence, language):
    """
    Translate a sentece to English.
    """
    translator= Translator(from_lang=language, to_lang="en")
    translation = translator.translate(sentence)
    print(translation)
    return translation
示例#9
0
def translate(string, fromlang, tolang):
    translator = Translator(from_lang=fromlang, to_lang=tolang)
    print "Translating from " + fromlang + " to " + tolang
    try:
        return translator.translate(string).encode("UTF-8")
    except Exception as e:
        print "HTTP Error. Translation failed. Please run the script again."
        sys.exit()
示例#10
0
def index(request, source, target, text):
    trans = Translator(to_lang=target, from_lang=source)
    translation = trans.translate(text)
    # return HttpResponse(translation)

    return HttpResponse("""{ "data": { "translations": [ { "translatedText": "%s" } ] } }""" % translation)

# def index(request, text, source, target):
#     return HttpResponse("Hello, world. You're at the polls index.")
示例#11
0
def trans(request):
    if request.method == 'POST':
        inpu = request.POST.get('intext', '')
        translator= Translator(to_lang="fr")
        out = translator.translate(str(inpu))
        print out
        return HttpResponse(out)
    else:
         return render(request, 'personal/header.html')
示例#12
0
def __trans(flag, lang, message):
    try:
        if len(message) > MAX_TRANSLATE_LENGTH:
            resp = "Don't be a dick <@{}>".format(evt["user"])
            __send_response(resp, icon_ru)
        else:
            translator = Translator(to_lang=lang)
            l = translator.translate(message)
            __send_response(l, "emoji", flag)
    except ValueError:
        resp = 'Vhy try to anger {} <@{}>?'.format(BOT_NAME, evt["user"])
        __send_response(resp, icon_ru)
示例#13
0
def generate_release_names(num, names, translate=True, show_excluded=False):
    """Generate release names for Montréal Python edition

    num: amount of names to generate
    names: list of English names in format "Adjective Noun"
    translate: query google translate to provide French translation

    returns a tuple of two lists in format: (french names, english names)
    """
    en_names = []
    fr_names = []

    for en_name in sorted(names, key=len):
        if len(en_names) == num:
            break

        if not translate:
            en_names.append(en_name)
            continue

        translator = Translator(from_lang='en', to_lang='fr')
        fr_name = translator.translate(en_name).encode('utf8')

        # allow another run when the translated name
        # produces more than two words for our release name
        if len(fr_name.split(' ')) != 2:
            continue

        en_adj, en_noun = en_name.strip().split(' ')
        fr_adj, fr_noun = fr_name.strip().split(' ')

        # only keep release names for which one of the
        # translation's opposed word is a match.
        s_fr_adj = strip_accents(fr_adj.decode('utf8'))
        s_fr_noun = strip_accents(fr_noun.decode('utf8'))

        save = lambda l, adj, noun: l.append(' '.join([adj, noun]))
        if s_fr_adj == en_noun:
            # TODO: s_fr_adj is really french?
            save(en_names, en_adj, en_noun)
            save(fr_names, fr_adj, fr_noun)
            continue
        elif s_fr_noun == en_adj:
            # TODO: s_fr_noun is really french?
            save(en_names, en_adj, en_noun)
            save(fr_names, fr_adj, fr_noun)
            continue
        elif show_excluded:
            c = lambda c: ' '.join([w.capitalize() for w in c.split(' ')])
            print("excluded: %s (%s)" % (c(en_name), c(fr_name)))

    return fr_names, en_names
示例#14
0
def sms():
    text = request.form.get('Body', '')
    response = twiml.Response()
    text = text.strip()
    if text == 'HelpMe':
      response.sms("use 2 letter language code in this format for from and to ' from @to @message'")
    else: 
      text = text.split('@')  
      text[0] = text[0].strip()
      text[1] = text[1].strip()
      text[2] = text[2].strip()
      translator = Translator(to_lang=text[1], from_lang=text[0])
      translation = translator.translate(text[2])
      response.sms(translation)
    return str(response)
示例#15
0
def autotranslate(path, source_language, target_language,
                  ignore_already_translated=True):
    """Given a po file which is opened using polib and processed through
       Google Translator for all untranslated items by default
    """

    try:
        catalog = polib.pofile(path)
    except UnicodeDecodeError:
        raise Exception(("Encoding problem while parsing {0}, are you "
                         "sure that's a PO file?").format(path))

    translator = Translator(to_lang=target_language, from_lang=source_language)
    try:
        for idx, entry in enumerate(catalog):

            if ignore_already_translated and entry.translated():
                continue

            default_text = DEFAULT.match(entry.comment)
            if default_text:
                to_translate = default_text.group(1)
            else:
                to_translate = entry.msgid

            # Do we have to handle variables?
            variables = VAR_REGEX.search(to_translate)
            try:
                translated = translator.translate(to_translate.encode('utf-8'))
            except urllib2.HTTPError as e:
                log(ERROR, 'Error', u'{0:s} raised {1}: {2:s}'.format(
                    entry, e.__class__, e))
                continue

            if variables is not None:
                log(INFO, 'Found variable(s)', ', '.join(variables.groups()))

            log(SUCCESS, 'Success', u'{0} -> {1}'.format(
                to_translate, translated))

            # Save entry
            catalog[idx].msgstr = translated
            time.sleep(BREATHE)

    except KeyboardInterrupt:
        log(ERROR, 'Quit', '')

    catalog.save()
示例#16
0
class TestTranslator(TestCase):
    def setUp(self):
        self.trans = Translator()
        self.trans['a'] = 'x'
        self.trans['b'] = 'y'
        self.trans['c'] = 'z'

    def test_translate_known(self):
        self.assertEquals(self.trans('a'), 'x')
        self.assertEquals(self.trans['c'], 'z')
        self.assertEquals(self.trans['a'], 'x')

    def test_translate_unknown_but_in_alphabet(self):
        self.assertEquals(self.trans('h'), unknown_letter)

    def test_translate_unknown_but_not_in_alphabet(self):
        self.assertEquals(self.trans('G'), 'G')
        self.assertEquals(self.trans('$'), '$')

    def test_translate_full_string(self):
        self.assertEquals(self.trans('aG$zb_7k'), 'xG$_y_7_')

    def test_clone(self):
        cloned = self.trans.clone()
        self.trans['a'] = 'g'
        self.assertEquals(cloned('a'), 'x')
示例#17
0
    def __get_geocode(self, hospital):
        geolocator = GoogleV3(api_key=self.API_KEY)
        translator = Translator(from_lang='bg', to_lang='eng')

        name_query = self.__filter_only_letters(hospital.name)
        address_query = self.__reverse_abbreviations(self.__filter_only_letters(hospital.address))
        translated_address_query = translator.translate(address_query)

        # `translated address` followed by `address` lead to more accurate matches
        for query in [translated_address_query, address_query, name_query]:
            geocode_result = geolocator.geocode(query=query, exactly_one=True,
                                                timeout=self.API_TIMEOUT,
                                                sensor=False)
            if geocode_result is not None:
                break
        return geocode_result
示例#18
0
文件: botnet.py 项目: jpichon/botnet
 def __init__(self):
     self.eliza = eliza()
     super(BotnetJabberClient,self).__init__(self.BOT_USER, self.BOT_PASS)
     self.geoPlanet = YahooGeoPlanetSearch()
     self.yahooWeather = YahooWeatherSearch()
     self.pyconIreland = PyconIreland()
     self.translator = Translator()
示例#19
0
class TestTranslator(unittest.TestCase):

    client_id = "<CLIENT_ID>"
    client_secret = "<CLIENT_SECRET>"

    def setUp(self):
        self.t = Translator(self.client_id, self.client_secret)

    def test_get_access_token(self):
        self.assertNotEqual(self.t.get_access_token(), "", "The access token is empty!")

    def test_translate(self):
        translated = self.t.translate("day", "en", "it")
        self.assertEqual(translated.lower(), "giorno", "The translated word is incorrect!")

        translated = self.t.translate("summer", "en", "ro")
        self.assertEqual(translated.lower(), "vara", "The translated word is incorrect!")
示例#20
0
    async def translate(self, language, *text):
        """Translates text from English to specified language

        **Use double quotes for each option**

        **Dependencies**: pip install translate
                          (https://github.com/terryyin/google-translate-python)

        Keyword arguments:
        language -- Two-letter code for the languate to translate to
        text -- Text to translate.

        """
        text_to_string = ''.join(text)
        translator = Translator(to_lang=language)
        translation = translator.translate(text_to_string)

        await self.bot.say(translation)
示例#21
0
文件: trans.py 项目: HANNATH/vsm
def transwrapper(text, from_lang, to_lang):
    
    if from_lang == 'en':
        lang = 'english'
    elif from_lang == 'fr':
        lang = 'french'
    elif from_lang == 'de':
        lang = 'german'
    sli = sent_tokenize(text, lang=lang)
    
    out = ''
    for sent in sli:
        sent = cleanup(sent) 
        
        ts = Ts(from_lang=from_lang, to_lang=to_lang)
        target = ts.translate(sent)
        out += target
    
    return out
def main(asmfile="",from_lang="cz",to_lang="en"):
  if len(asmfile) > 0 and not isfile(asmfile):
    print "%s is not a file!" % asmfile
    exit(1)
  tl=Translator(from_lang=from_lang,to_lang=to_lang)
  #read from stdin or a file
  if len(asmfile) == 0:
    data=stdin.read()
  else:
    with open(asmfile,'r') as f:
      data=f.read()
  #try translating comments otherwise simply output the line
  for x in data.split('\n'):
    parts=x.split(';',1)
    if len(parts) > 1:
      parts[1]=tl.translate(parts[1])
      print ';'.join(parts)
    else:
      print x
示例#23
0
def main():
    config = Config('config.yaml')
    config.load() # Loads the configuration file.
    settings = config.get_config() # Stores the settings.

    core = Core(settings['proxy'], settings['server'], settings['listen'])
    core.connect() # Connects with the server over TOR.
    core.login(settings['account']['user'], # Login using nick and password.
               settings['account']['password'])

    # Joins to the channels.
    core.join(settings['transmission']['retransmit'])
    core.join(settings['transmission']['origin'])

    translator = Translator(from_lang=settings['translate']['from'],
                            to_lang=settings['translate']['to'])

    while True: # Main loop.
        output = core.read() # Reads data from the server.
        if core.ismessage(output): # If data is a message...
            # Get the nick of the owner.
            emitter = core.isemitter(output) if core.filter() \
            else output.split(' ')[0].split('!')[0][1:]

            # Get the name of the channel.
            channel = output.split(':')[1].split('#')[1][:-1]
            # If user and channel are allowed...
            if emitter and channel == settings['transmission']['origin']:
                # Builds a response.
                text = '@' + emitter + '#' + channel + ': ' + \
                       translator.translate(output.split(':')[2])
                # Sends the translated message to the desired channel.
                core.msg(settings['transmission']['retransmit'], text)
                print(text) # Shows the message.

        # If data is a PING...
        if output.find('PING') == 0: # Prevent timeout
            core.pong(output) # Sends a PONG.

        time.sleep(0.2) # Waits for 0.2 seconds.
示例#24
0
def ask_mapping(token_list):
    "Asks the user for a mapping"
    translation_name = input('Enter code of target language("in" for hindi): ')
    translation_name = translation_name.upper().strip()
    translator = Translator(to_lang=translation_name)
    mapping = {}
    for token in token_list:
        internal = False
        if token[:2] + token[-2:] == '____':
            token = token[2:-2]
            internal = True
        translation = translator.translate(token)
        translation = translation.replace(' ', '_').strip()
        if internal:
            token = '__' + token + '__'
            translation = '__' + translation + '__'
        mapped = {translation: token}
        print(mapped)
        mapping.update(mapped)
    f = open(translation_name, 'wb')
    pickle.dump(mapping, f)
    f.close()
示例#25
0
def translate_video_title(what):
	to_language = g.lang_code

	if to_language == "en":
		return what

	key = "{0}_{1}".format(what, to_language)

	translation = redis.get(key)

	if translation:
		return translation

	translator = Translator(to_lang=to_language)
	
	try:
		translation = translator.translate(what)
		redis.set(key, translation)
	except:
		translation = what

	return translation
示例#26
0
def decipher_file():
    """
    Main pipeline to decipher the file
    :return:
    """
    input_data = get_input_data()
    word_data = get_word_data()

    translate = Translator()
    solve.get_paircounts_translation_iteratively(
        translate, input_data, word_data)
    for iter in xrange(parameters['num_iterations_modify_letters']):
        solve.modify_each_letter(translate, input_data, word_data)

    logger.info('Final solution\n-------------------\n')
    true_translation = true_translation_dictionary()
    for k, v in translate.items():
        if k in input_data['ciphered_text']:
            logger.info("%s, %s, %s" % (k, v, (v == true_translation[k])))

    logger.debug("Finished decipher")
    return translate
示例#27
0
文件: botiana.py 项目: rpkish/botiana
def __trans(flag, lang, message):
    try:
        if len(message) > MAX_TRANSLATE_LENGTH:
            resp = "Don't be a dick <@{}>".format(evt["user"])
            __send_response(resp, icon_ru)
        elif bot_mention in message:
            __send_response(msg_noop, icon_ru)
        else:
          if len(lang) > 2 and lang.find('|')!=-1:
              from_lang = lang.split("|")[0]
              to_lang = lang.split("|")[1]
              if len(from_lang) > 2 or len(to_lang) > 2:
                  __send_response(msg_noop, "emoji", ":flag-ru:")
              else:
                try:
                  translator = Translator(to_lang=to_lang, from_lang=from_lang)
                  if from_lang == "en":
                      flag = ":flag-us:"
                  else:
                      flag = ":flag-" + from_lang + ":"
                  l = translator.translate(message)
                  __send_response(l, "emoji", flag)
                except TypeError:
                  resp = 'hey <@{}>... {} don\'t speak that language.'.format(evt["user"],BOT_NAME)
                  __send_response(resp, icon_ru)
          elif len(lang) > 2:
              __send_response(msg_noop, "emoji", ":flag-ru:")
          else:
            try:
              translator = Translator(to_lang=lang)
              l = translator.translate(message)
              __send_response(l, "emoji", ":earth_americas:")
            except TypeError:
                angry()
    except ValueError:
        resp = 'Vhy try to anger {} <@{}>?'.format(BOT_NAME, evt["user"])
        __send_response(resp, icon_ru)
示例#28
0
    def setTargetCountry(self, targetcountrynames, language='es'):
        path = os.path.join(kmethods.__path__[0], 'new_country_db.json')
        translator = Translator(from_lang='en',to_lang='es')        
        
        hashtable = kgen.jload(path)
        
        targettable = dict()
        if(type(targetcountrynames)==list):            
            for n in targetcountrynames:
                try:
                    if(type(n)==unicode):
                        n = unicodedata.normalize('NFKD', n).encode('ascii','ignore').lower()
                    transn = translator.translate(n).lower()
                    targettable[n] = hashtable.pop(n)
                    if(hashtable.has_key(transn)):
                        targettable[transn] = hashtable.pop(transn)
                except:
                    print "name %s not found" % n
        elif(type(targetcountrynames)==str or type(targetcountrynames)==unicode):
            try:
                if(type(targetcountrynames)==unicode):
                    targetcountrynames = unicodedata.normalize('NFKD', targetcountrynames).encode('ascii','ignore').lower()
                transn = translator.translate(targetcountrynames).lower()
#                transn = u'm\u00e9xico'
                targettable[targetcountrynames] = hashtable.pop(targetcountrynames)
                if(hashtable.has_key(transn)):
                    targettable[transn] = hashtable.pop(transn)
            except:
                print "name %s not found" % targetcountrynames
        else:
            print "please input list or string"
            return
            
        allterms = [k for t in hashtable.values() for k in t.values()]
        self.NonTargetCountries = dict(zip(allterms, range(len(allterms))))
        alltargets = [k for t in targettable.values() for k in t.values()]
        self.TargetCountries = dict(zip(alltargets, range(len(alltargets))))
示例#29
0
文件: main.py 项目: MemeTrash/DogeGen
class DogeGen(object):
    def __init__(self, resources):
        self.translator = Translator()
        self.resources = resources

    def make_meme(self, text, output_path, max_phrases):
        """
        Given command line arguments, generates a Doge meme.

        Args:
            text (str): Text to translate into Dogespeak.
            output_path (str): Output path for Doge meme.
            max_phrases (int): Maximum number of phrases.
        """
        translated = self.translator.dogeify_text(text, 0.3, 0.2, max_phrases)
        draw_doge_meme(self.resources + IMAGE_PATH, output_path, self.resources + FONT_PATH, translated)
示例#30
0
class Plugin(plugin.Plugin):
    def prepare(self):
        self.translator = Translator(self.conf.conf['bing_app_id'])

    def register_commands(self):
        self.commands = [
                ('party <<phrase>>', self.party)
                ]

    def party(self, message, args):
        """ A recreation of <a href="http://translationparty.com/">Translation Party</a> using the Bing translate API.
        $<comchar>party scissor me timbers
        >I have a tree.\x03# |\x03 \x027\x02 attempts\x03# |\x03 http://woof.bldm.us/party/<network>/Derasonika-120213-235608 """
        transvia = self.conf.conf['party_via']
    

        party = [args['phrase']]
        while dupes(party) == False:
            party.append(self.translator.translate('en', transvia, party[-1]))
            party.append(self.translator.translate(transvia, 'en', party[-1]))
        
        filename = '%s-%s' % (message.nick, time.strftime('%y%m%d-%H%M%S'))
        filepath = path.expanduser(self.conf.conf['party_dir']+self.conf.alias+'/')
        if not path.exists(filepath):
            mkdir(filepath)
        elif path.exists(filepath) and not path.isdir(filepath):
            raise OSError('\'party_dir\' is not a directory')
        filepath = filepath+filename+'.txt'

        print ' -- Writing to %s...' % filepath
        file = open(filepath, mode='w')
        sup = '\n'.join(party)
        file.write(sup)
        file.close()
        
        attempts = (len(party)-1)/2
        self.irc.privmsg(message.source, '%s | \x02%i\x02 attempts | %sparty/%s/%s/' % (party[-1], attempts, self.conf.conf['web_url'], self.conf.alias, filename), pretty=True)
示例#31
0
from translate import Translator
lang1 = input("Enter the language of text:")
lang2 = input("Enter the language ou want to convert the text in:")
translator = Translator(from_lang=lang1, to_lang=lang2)
stringInput = input("Enter the text you want to translate:")
translation = translator.translate(stringInput)
print(translation)
示例#32
0
from translate import Translator

location = 'write here the location of the file'

with open(location) as poem:
    translator = Translator(to_lang='ja')
    translation = translator.translate(poem.readline(50))
    print(translation)
示例#33
0
def fib3(number):  # third way in list which is more efficient
    a=0
    b=1
    result = []
    for i in range(number):
        result.append(a)
        temp = a
        a = b
        b = temp+b
    return result

print(fib3(20))

print('####### Eleventh one')
from translate import Translator
translator = Translator(to_lang='ja')
try:
    with open('translate.txt' , mode='r') as my_file:
        text = my_file.read()
        translation = translator.translate(text)
        print(text,translation)
        with open('translate-ja.txt' , mode='w',encoding="utf-8") as my_file2:  # we should set encoding to set my file
            my_file2.write(translation)
except FileNotFoundError as err:
    print('check your file',err)


print('####### Twelfth one')
import re
password = '******'
# generate password with at least 8 characters which ends with number
示例#34
0
from translate import Translator

translator = Translator(to_lang='zh-TW')
translation = translator.translate('this is a pen!')
print(translation)

text1 = input('Sentences that you want to translate:')
trans1 = translator.translate(text1)
print(trans1)

try:
    with open('text.txt', mode='r') as my_file:
        text = my_file.read()
        trans = translator.translate(text)
        print(trans)
        with open('text-zh.txt', mode='w') as my_file1:
            text1 = my_file1.write(trans)
except FileNotFoundError as err:
    print('check your file path')
示例#35
0
from greetings import greetings
from translate import Translator

translator = Translator(to_lang='es')
for g in greetings:
    print(translator.translate(g).title())
import requests
import collections
from translate import Translator
from word import search_for

translator = Translator(to_lang="vi")

ini_url = "https://api.onelook.com/words?v=ol_gte3&ml=%20{{ u }}&qe=ml&md=dp&max=1000&k=olthes_r4"
url = ini_url.replace("{{ u }}", search_for)
r = requests.get(url)
data = r.json()

tu_dong_nghia = []
for _ in range(0, 25):
    if data[_]['word'] is not None:
        tu_dong_nghia.append(data[_]['word'])

ds_tu_dong_nghia = []

from google.cloud import translate
translate_client = translate.Client()
target = 'vi'

for word in tu_dong_nghia:
    translation = translate_client.translate(word, target_language=target)
    if translation != word.capitalize():
        ds_tu_dong_nghia.append(translation['translatedText'].capitalize())
print(ds_tu_dong_nghia)

for word in tu_dong_nghia:
    translation = translator.translate(word)
示例#37
0
class TestOctaveTranslator(unittest.TestCase):
    def setUp(self):
        with open("targets/octave.json", "r") as targetFile:
            self.translator = Translator(json.load(targetFile))

    def test_translateProgram(self):
        """
        CSVFile trainf("train.dat")
        RealFeatures feats_train(trainf)
        CSVFile testf("test.dat")

        Translates to:
        trainf = CSVFile('train.dat');
        feats_train = RealFeatures(trainf);
        testf = CSVFile('test.dat')
        """
        programAST = [{
            "Statement": {
                "Init": [{
                    "ObjectType": "CSVFile"
                }, {
                    "Identifier": "trainf"
                }, {
                    "ArgumentList": {
                        "Expr": {
                            "StringLiteral": "train.dat"
                        }
                    }
                }]
            }
        }, {
            "Statement": {
                "Init": [{
                    "ObjectType": "RealFeatures"
                }, {
                    "Identifier": "feats_train"
                }, {
                    "ArgumentList": {
                        "Expr": {
                            "Identifier": "trainf"
                        }
                    }
                }]
            }
        }, {
            "Statement": {
                "Init": [{
                    "ObjectType": "CSVFile"
                }, {
                    "Identifier": "testf"
                }, {
                    "ArgumentList": {
                        "Expr": {
                            "StringLiteral": "test.dat"
                        }
                    }
                }]
            }
        }]

        translation = self.translator.translateProgram(programAST)

        self.assertEqual(
            translation,
            u"modshogun\n\ntrainf = CSVFile('train.dat');\nfeats_train = RealFeatures(trainf);\ntestf = CSVFile('test.dat');\n"
        )

    def test_translateProgramWithNewlines(self):
        programAST = [{
            "Statement": {
                "Init": [{
                    "ObjectType": "CSVFile"
                }, {
                    "Identifier": "trainf"
                }, {
                    "ArgumentList": {
                        "Expr": {
                            "StringLiteral": "train.dat"
                        }
                    }
                }]
            }
        }, {
            "Statement": "\n"
        }, {
            "Statement": {
                "Init": [{
                    "ObjectType": "RealFeatures"
                }, {
                    "Identifier": "feats_train"
                }, {
                    "ArgumentList": {
                        "Expr": {
                            "Identifier": "trainf"
                        }
                    }
                }]
            }
        }, {
            "Statement": "\n"
        }, {
            "Statement": {
                "Init": [{
                    "ObjectType": "CSVFile"
                }, {
                    "Identifier": "testf"
                }, {
                    "ArgumentList": {
                        "Expr": {
                            "StringLiteral": "test.dat"
                        }
                    }
                }]
            }
        }]

        translation = self.translator.translateProgram(programAST)

        self.assertEqual(
            translation,
            u"modshogun\n\ntrainf = CSVFile('train.dat');\n\nfeats_train = RealFeatures(trainf);\n\ntestf = CSVFile('test.dat');\n"
        )

    def test_translateInitCopy(self):
        initAST = [{
            "ObjectType": "IntMatrix"
        }, {
            "Identifier": "multiple_k"
        }, {
            "Expr": {
                "MethodCall": [{
                    "Identifier": "knn"
                }, {
                    "Identifier": "classify_for_multiple_k"
                }]
            }
        }]
        translation = self.translator.translateInit(initAST)
        self.assertEqual(translation,
                         u"multiple_k = knn.classify_for_multiple_k()")

    def test_translateInitConstruct(self):
        initAST = [{
            "ObjectType": "MulticlassLabels"
        }, {
            "Identifier": "labels"
        }, {
            "ArgumentList": {
                "Expr": {
                    "Identifier": "train_labels"
                }
            }
        }]
        translation = self.translator.translateInit(initAST)
        self.assertEqual(translation,
                         u"labels = MulticlassLabels(train_labels)")

    def test_translateInitConstructMultiple(self):
        initAST = [{
            "ObjectType": "EuclideanDistance"
        }, {
            "Identifier": "distance"
        }, {
            "ArgumentList": [{
                "Expr": {
                    "Identifier": "feats_train"
                }
            }, {
                "Expr": {
                    "Identifier": "feats_test"
                }
            }]
        }]
        translation = self.translator.translateInit(initAST)
        self.assertEqual(
            translation,
            u"distance = EuclideanDistance(feats_train, feats_test)")

    def test_translateStatementAssign(self):
        stmtAST = {
            "Assign": [{
                "Identifier": "knn_train"
            }, {
                "Expr": {
                    "BoolLiteral": "False"
                }
            }]
        }
        translation = self.translator.translateStatement(stmtAST)
        self.assertEqual(translation, u"knn_train = false;\n")

    def test_translateStatementExpr(self):
        stmtAST = {
            "Expr": {
                "MethodCall": [{
                    "Identifier": "knn"
                }, {
                    "Identifier": "train"
                }]
            }
        }

        translation = self.translator.translateStatement(stmtAST)
        self.assertEqual(translation, u"knn.train();\n")

    def test_translateStatementNewLine(self):
        stmtAST = "\n"
        translation = self.translator.translateStatement(stmtAST)
        self.assertEqual(translation, u"\n")

    def test_translateStatementPrint(self):
        stmtAST = {"Print": {"Expr": {"Identifier": "multiple_k"}}}

        translation = self.translator.translateStatement(stmtAST)

        self.assertEqual(translation, u"disp(multiple_k);\n")

    def test_translateType(self):
        typeAST = {"ObjectType": "IntMatrix"}
        translation = self.translator.translateType(typeAST)

        self.assertEqual(translation, u"IntMatrix")

    def test_translateExprEnum(self):
        enumAST = {
            "Enum": [{
                "Identifier": "LIBLINEAR_SOLVER_TYPE"
            }, {
                "Identifier": "L2R_L2LOSS_SVC_DUAL"
            }]
        }
        translation = self.translator.translateExpr(enumAST)

        self.assertEqual(translation, u"L2R_L2LOSS_SVC_DUAL")
        self.assertTrue(
            (u"LIBLINEAR_SOLVER_TYPE",
             u"L2R_L2LOSS_SVC_DUAL") in self.translator.dependencies["Enums"])

    def test_translateProgramComment(self):
        programAST = [{"Comment": " This is a comment"}]
        translation = self.translator.translateProgram(programAST)

        self.assertEqual(translation, u"modshogun\n\n% This is a comment\n")
示例#38
0
import os
from translate import Translator
import pandas as pd
from os.path import join

print("This script creates a csv table with translations \nfrom a list of items (words or sentences) separated by commas\n")

# https://pypi.org/project/translate/
cwd = os.getcwd()
files = os.listdir(cwd)

print("Files found in the directory:")
print(files, "\n")

tr = Translator(to_lang="es")

try:
	for f in files:
		if f.endswith(".txt"):
			print("\nTranslating items from file: '{}'".format(f))
			data = [word.split(',') for word in open(f, 'r').readlines()]
			data = [word.replace("\n", "") for word in data[0]]
			trans = [tr.translate(w) for w in data]
			dicto = dict(zip(data,trans))
			#print("Items to be translated:")
			#print(dicto)
			df = pd.DataFrame(dicto.items(), columns = ['English','Spanish'])
			print("\n", df)
			print("\nThe translated file can be found here: ", join(cwd, f.rsplit(".",1)[0]+".csv"))
			df.to_csv(join(cwd, f.rsplit(".",1)[0]+".csv"), index=False)
from translate import Translator
import argparse

args = argparse.ArgumentParser("python3 type_translate.py")

args.add_argument('-ne', '-nepali')
args.add_argument('-ja', '-japanese')
args.add_argument('-es', '-spanish')

options = args.parse_args()

translator = Translator(to_lang="ja")

text = input("Enter To Translate: ")

translation = translator.translate(text)
print(translation)
示例#40
0
 def __init__(self, dest_lang='cs'):
     self.translator = Translator(dest_lang)
     self.src = 'cs'
     self.dest = dest_lang
     self.translate = False
     self.commands_re = build_regex()
示例#41
0
文件: html2json.py 项目: yeecai/regex
import re, sys, os, codecs, json
from translate import Translator

path = "your file path"

reload(sys)
sys.setdefaultencoding('utf-8')

fn = os.path.join(path + sys.argv[1])

regex = r'<\s*Button[^>]*>(.*?)<\s*/\s*Button>'
with open(fn, "r") as f:
    text = f.read()
    html_str = re.sub(r'<br>', ' ', text)
    relt = re.findall(regex, html_str)

list(set(relt))

data = {}

translator = Translator(from_lang="chinese", to_lang="english")

for x in range(len(relt)):
    translation = translator.translate(relt[x])
    data[relt[x]] = translation
    result = json.dumps(data, ensure_ascii=False, indent=2)

with codecs.open("result.json", "w", 'utf-8') as f:
    f.write(result)
示例#42
0
class Translate:
    def __init__(self, dest_lang='cs'):
        self.translator = Translator(dest_lang)
        self.src = 'cs'
        self.dest = dest_lang
        self.translate = False
        self.commands_re = build_regex()
        # self.translator = Translator(service_urls=['translate.google.cz'])    # googletrans

    @property
    def dest(self):
        return self.translator.to_lang

    @dest.setter
    def dest(self, value):
        self.translator = Translator(to_lang=value, from_lang=self.src)

    @property
    def src(self):
        return self.translator.from_lang

    @src.setter
    def src(self, value):
        self.translator = Translator(to_lang=self.dest, from_lang=value)

    def state(self):
        print(
            f'\tTranslate? {str(self.translate).upper()} | Src: {self.src} | Dest: {self.dest}'
        )

    def process_clipboard(self, text):

        if self.check_command(text) or not self.translate:
            self.state()
            return None
        else:
            self.state()

            # self.src = self.translator.detect(text).lang
            text = remove_special(text)
            text = remove_all_caps(text, self.src)
            print(f'\tGoing to translator: {text}')
            return self.translator.translate(text)

    def check_command(self, text):
        text = text.lower().strip()
        matches = self.commands_re.fullmatch(text)
        if matches is None:
            return False
        matches = matches.groupdict()
        matches['src'] = next(filter(None, [matches['src'], matches['src2']]),
                              False)
        matches['dest'] = next(
            filter(None, [matches['dest'], matches['dest2']]), False)
        print(matches)
        if matches['start']:
            self.translate = True
        if matches['stop']:
            self.translate = False
        if matches['dest']:
            self.dest = to_lang_code(matches['dest'])
        if matches['src']:
            self.src = to_lang_code(matches['src'])

        return True
示例#43
0
class Bot:
    """
    Бот-переводчик VK
    Python version 3.7
    """
    def __init__(self, GROUP_ID, TOKEN, to_lang='en', from_lang='ru'):
        """

        :param GROUP_ID: ID группы VK
        :param TOKEN: секретный токен
        :param from_lang: исходный язык, по умолчанию RU
        :param to_lang: язык перевода, по умолчанию EN
        """
        self.group_id = GROUP_ID
        self.token = TOKEN
        self.vk = vk_api.VkApi(token=TOKEN)
        self.long_poller = VkBotLongPoll(self.vk, self.group_id)
        self.api = self.vk.get_api()
        self.translator = Translator(to_lang, from_lang)

    def run(self):
        """
        Запуск бота
        """
        for event in self.long_poller.listen():
            try:
                self.on_event(event)
            except Exception:
                log.exception('Ошибка обработки')

    def on_event(self, event):
        """
        Обработка события: принтмат сообжение, переводит и отправляет сообщение назад
        :param event: VkBotMessageEvent
        :return: None
        """
        if event.type == VkBotEventType.MESSAGE_NEW:
            log.info('Обработка сообщения')
            try:
                if not event.object.text:
                    send_message = 'В сообщении отсутствует текст'
                elif len(event.object.text) > 500:
                    send_message = 'Максимальная длина текста для перевода: 500 знаков'
                else:
                    send_message = self.on_translate(event)
                self.api.messages.send(
                    message=send_message,
                    random_id=random.randint(0, 2**20),
                    peer_id=event.object.peer_id,
                )
            except Exception:
                log.exception('Ошибка сообщения')
        else:
            log.debug('Сообщения такого типа не обрабатываются %s', event.type)

    def on_translate(self, event):
        """
        Перевод переданного сообщения
        :param event: VkBotMessageEvent
        :return: str, переведённый текст
        """
        try:
            translation = self.translator.translate(event.object.text)
        except Exception as err:
            translation = 'Перевод не удался'
        return translation
示例#44
0
import pandas as pd
import xlrd
import sys
reload(sys)
sys.setdefaultencoding('utf8')

df = pd.read_excel('sentiment_analysis.xlsx')
df.head()

from translate import Translator

for index, row in df.iterrows():
    en_blob = df.iloc[index]['Frase']
    sentiment = df.iloc[index]['Sentimento']
    translator = Translator(to_lang="pt")
    pt_blob = translator.translate(en_blob)
    df.at[index, str('Frase')] = str(pt_blob)
    df.at[index, str('Sentimento')] = sentiment
df.head()

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import BernoulliNB

vetorizador = CountVectorizer(binary='true')
X = vetorizador.fit_transform(df['Frase'])

y = df.Sentimento

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
示例#45
0
import json
import re
import autocomplete

from wit import Wit
from translate import Translator
from PyDictionary import PyDictionary
import requests

access_token = "AOOEJH2MHT3YNXUTWBWTB2KYRAYBRLHI"
client = Wit(access_token= access_token)

dictionary = PyDictionary()
translator = Translator(to_lang="th")

def wit_response(message_text):
    resp = client.message(message_text)
    entity = None
    value = None
    try:
        entity = list(resp['entities'])[0]
        value = resp['entities'][entity][0]['value']
    except Exception as e:
        pass
    return (entity,value)
def translate_response(message_text):
    try:
        meaning_JSON =  dictionary.meaning(message_text)
        meaning = json.loads(json.dumps(meaning_JSON))
        tran = translator.translate(message_text)
        if 'none' not in str(meaning).lower():
示例#46
0
    except:
        print("Butun kelimeler çıkartıldı")


def ignoreKelimeleriAl():
    ignor = str(open("ignore.txt").read()) + " "
    ignor = ignor + str(open("sayilar.txt").read()) + " "
    ignor = ignor + str(open("isimler.txt").read()) + " "
    return ignor


# tanımlamalar
kelimeler = []
ignore = []
count = []
translator = Translator(
    to_lang="tr")  #türkçe ye çevirmek için tanımlama yapılır

kelimeler = diziyeAt(kirp(open("text.txt").read()))  #kelimeler çekilir
ignore = diziyeAt(kirp(ignoreKelimeleriAl()))

ilkHarfiBuyukYap(
)  # kelimelerin yazımlarının düzenlenmesi için bütün kelimelerin ilk harfleri büyültülür kalan harfleri küçültülür.
mukerrer()  #tekrar eden eden kelimeler ortadan kaldırılır
mukerrerIgnere()  # mukerrer fonksiyonunun aynısı ama ıgnor dizisi için
sirala(
)  # en çok tekrar eden kelimelerden en az tekrar eden kelimelere dizi sıralanır.
ignoreKelimeleriCikart(
)  #ıgnore   olarak belirlediginiz kelimeler "kelimeler"  dizisinden çıkartılır

#for i in range (0,len(kelimeler)):
#    print(kelimeler[i])
示例#47
0
from gensim.models import KeyedVectors
from translate import Translator

trad = Translator(from_lang='fr', to_lang="en")

model = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin',
                                          binary=True)

print('Model build')

condSortie = 1

# while condSortie != 0:
# myInput = input("Entrez le mot à trouver : ")
#  = trad.translate(myInput)

clusterInge = [
    "engineer", "electrical_engineer", "mechanical_engineer", "engineering"
]

clusterCEO = [
    "Director", "CEO", "Executive_Director", "Vice_President", "director",
    "President"
]

clusterManagement = [
    "Manager", "General_Manager", "Coordinator", "manager", "Product_Manager"
]

myword = "engineer"
mywordvec = model[myword]
示例#48
0
文件: main.py 项目: rekjef/discordbot
 async def translator(self, ctx, *args):
     translator = Translator(to_lang=args[0])
     translation = translator.translate(' '.join(args[1:]))
     await ctx.send(translation)
示例#49
0
def translateToSpanish(text):
    translator= Translator(to_lang="Spanish")
    translation = translator.translate(text)
    return translation
示例#50
0
 async def french_check(self, ctx, *, message):
     """Vérifie un message."""
     translator = Translator(to_lang="en", from_lang="fr")
     translation = translator.translate(message)
     await self.check(ctx, message=translation)
示例#51
0
from translate import Translator

try:
    with open('F:/Python/Practice/TestFolder/Name.txt', mode='r') as my_file:
        file_text = my_file.read()
        print(file_text)
        translator = Translator(to_lang='ja')
        translate_string = translator.translate(file_text)

        with open('F:/Python/Practice/TestFolder/Name-ja.txt', mode='') as my_file_translated:
            print(translate_string)
            my_file_translated.write(translate_string)


except FileNotFoundError as err:
    print('File Not Found')
    raise err
except IOError as err:
    print('Input/Output Error')
    raise err
示例#52
0
文件: server.py 项目: nd1511/nematus
class NematusServer(object):
    """
    Keeps a Nematus model in memory to answer http translation requests.
    """

    STATUS_LOADING = 'loading'
    STATUS_OK = 'ok'

    def __init__(self, server_settings):
        """
        Loads a translation model and initialises the webserver.

        @param server_settings: see `settings.py`
        """
        self._style = server_settings.style
        self._host = server_settings.host
        self._port = server_settings.port
        self._threads = server_settings.threads
        self._debug = server_settings.verbose
        self._models = server_settings.models
        self._num_processes = server_settings.num_processes
        self._status = self.STATUS_LOADING
        # start webserver
        self._server = Bottle()
        self._server.config['logging.level'] = 'DEBUG' if server_settings.verbose else 'WARNING'
        self._server.config['logging.format'] = '%(levelname)s: %(message)s'
        self._server.install(LoggingPlugin(self._server.config))
        logging.info("Starting Nematus Server")
        # start translation workers
        logging.info("Loading translation models")
        self._translator = Translator(server_settings)
        self._status = self.STATUS_OK

    def status(self):
        """
        Reports on the status of this translation server.
        """
        response_data = {
            'status': self._status,
            'models': self._models,
            'version': pkg_resources.require("nematus")[0].version,
            'service': 'nematus',
        }
        response.content_type = "application/json"
        return json.dumps(response_data)

    def translate(self):
        """
        Processes a translation request.
        """
        translation_request = request_provider(self._style, request)
        logging.debug("REQUEST - " + repr(translation_request))

        translations = self._translator.translate(
            translation_request.segments,
            translation_request.settings
        )
        response_data = {
            'status': TranslationResponse.STATUS_OK,
            'segments': [translation.target_words for translation in translations],
        }
        translation_response = response_provider(self._style, **response_data)
        logging.debug("RESPONSE - " + repr(translation_response))

        response.content_type = translation_response.get_content_type()
        return repr(translation_response)

    def start(self):
        """
        Starts the webserver.
        """
        self._route()
        self._server.run(host=self._host, port=self._port, debug=self._debug, server='tornado', threads=self._threads)
        self._cleanup()

    def _cleanup(self):
        """
        Graceful exit for components.
        """
        self._translator.shutdown()

    def _route(self):
        """
        Routes webserver paths to functions.
        """
        self._server.route('/status', method="GET", callback=self.status)
        self._server.route('/translate', method="POST", callback=self.translate)
示例#53
0
 def __init__(self):
     self.translator = Translator(to_lang="en", from_lang='zh')
示例#54
0
 def setUp(self):
     with open("targets/octave.json", "r") as targetFile:
         self.translator = Translator(json.load(targetFile))
示例#55
0
    'kn': "Kannada"
}

st.markdown("<h1 style='text-align: center;'>Hailey :)</h1>",
            unsafe_allow_html=True)

ll = st.selectbox("In which language you want your audio file ?", [
    'English', 'Marathi', 'Hindi', 'Arabic', 'Bengali', 'Chinese', 'French',
    'German', 'Gujrati', 'Hebrew', 'Italian', 'Japnese', 'Kannada'
])

t = st.text_input("Enter your text here")

dest = list(dic.keys())[list(dic.values()).index(ll)]

translator = Translator(to_lang=dest)

translations = translator.translate(t)

if st.button("SUBMIT"):
    with st.spinner('Wait for it... '):
        time.sleep(3)
    try:
        st.success("Your translated text is " + str(translations))
        s = gtts.gTTS(text=translations, lang=dest, slow=False)
        s.save('result.mp3')
        a = open('result.mp3', 'rb')
        b = a.read()
        st.audio(b, format='audio/mp3')
    except ValueError:
        st.markdown(
示例#56
0
from translate import Translator

translator = Translator(from_lang="English", to_lang="Chinese")
translation = translator.translate("I am Chinese!")
print(translation)
示例#57
0
 def dest(self, value):
     self.translator = Translator(to_lang=value, from_lang=self.src)
示例#58
0
class Trainer(object):
    def __init__(self,
                 model_par,
                 train_data,
                 vocab_data,
                 optim,
                 lossCompute,
                 model,
                 valid_data=None,
                 tests_data=None):

        self.model_par, self.model, self.lossCompute, self.optim = model_par, model, lossCompute, optim
        self.sv, self.tv = vocab_data['src'].idx2key, vocab_data['trg'].idx2key
        self.train_data, self.valid_data, self.tests_data = train_data, valid_data, tests_data
        self.max_epochs, self.start_epoch = wargs.max_epochs, wargs.start_epoch

        self.n_look = wargs.n_look
        assert self.n_look <= wargs.batch_size, 'eyeball count > batch size'
        self.n_batches = len(train_data)  # [low, high)

        self.look_xs, self.look_ys = None, None
        if wargs.fix_looking is True:
            rand_idxs = random.sample(range(train_data.n_sent), self.n_look)
            wlog(
                'randomly look {} samples frow the whole training data'.format(
                    self.n_look))
            self.look_xs = [train_data.x_list[i][0] for i in rand_idxs]
            self.look_ys = [train_data.y_list_files[i][0] for i in rand_idxs]
        self.tor = Translator(model, self.sv, self.tv, gpu_ids=wargs.gpu_ids)
        self.n_eval = 1

        self.grad_accum_count = wargs.grad_accum_count

        self.epoch_shuffle_train = wargs.epoch_shuffle_train
        self.epoch_shuffle_batch = wargs.epoch_shuffle_batch
        self.ss_cur_prob = wargs.ss_prob_begin
        if wargs.ss_type is not None:
            wlog(
                'word-level optimizing bias between training and decoding ...')
            if wargs.bleu_sampling is True:
                wlog('sentence-level optimizing ...')
            wlog('schedule sampling value {}'.format(self.ss_cur_prob))
            if self.ss_cur_prob < 1. and wargs.bleu_sampling is True:
                self.sampler = Nbs(self.model,
                                   self.tv,
                                   k=3,
                                   noise=wargs.bleu_gumbel_noise,
                                   batch_sample=True)
        if self.grad_accum_count > 1:
            assert (
                wargs.chunk_size == 0
            ), 'to accumulate grads, disable target sequence truncating'

    def accum_matrics(self, batch_size, xtoks, ytoks, nll, ok_ytoks, bow_loss):

        self.look_sents += batch_size
        self.e_sents += batch_size
        self.look_nll += nll
        self.look_bow_loss += bow_loss
        self.look_ok_ytoks += ok_ytoks
        self.e_nll += nll
        self.e_ok_ytoks += ok_ytoks
        self.look_xtoks += xtoks
        self.look_ytoks += ytoks
        self.e_ytoks += ytoks

    def grad_accumulate(self, real_batches, e_idx, n_upds):

        #if self.grad_accum_count > 1:
        #    self.model_par.zero_grad()

        for batch in real_batches:

            # (batch_size, max_slen_batch)
            _, xs, y_for_files, bows, x_lens, xs_mask, y_mask_for_files, bows_mask = batch
            _batch_size = xs.size(0)
            ys, ys_mask = y_for_files[0], y_mask_for_files[0]
            #wlog('x: {}, x_mask: {}, y: {}, y_mask: {}'.format(
            #    xs.size(), xs_mask.size(), ys.size(), ys_mask.size()))
            if bows is not None:
                bows, bows_mask = bows[0], bows_mask[0]
                #wlog('bows: {}, bows_mask: {})'.format(bows.size(), bows_mask.size()))
            _xtoks = xs.data.ne(PAD).sum().item()
            assert _xtoks == x_lens.data.sum().item()
            _ytoks = ys[:, 1:].data.ne(PAD).sum().item()

            #if self.grad_accum_count == 1: self.model_par.zero_grad()
            # exclude last target word from inputs
            results = self.model_par(xs, ys[:, :-1], xs_mask, ys_mask[:, :-1],
                                     self.ss_cur_prob)
            logits, alphas, contexts = results['logit'], results[
                'attend'], results['context']
            # (batch_size, y_Lm1, out_size)

            gold, gold_mask = ys[:, 1:].contiguous(), ys_mask[:,
                                                              1:].contiguous()
            # 3. Compute loss in shards for memory efficiency.
            _nll, _ok_ytoks, _bow_loss = self.lossCompute(
                logits, e_idx, n_upds, gold, gold_mask, None, bows, bows_mask,
                contexts)

            self.accum_matrics(_batch_size, _xtoks, _ytoks, _nll, _ok_ytoks,
                               _bow_loss)
        # 3. Update the parameters and statistics.
        self.optim.step()
        self.optim.optimizer.zero_grad()
        #tc.cuda.empty_cache()

    def look_samples(self, n_steps):

        if n_steps % wargs.look_freq == 0:

            look_start = time.time()
            self.model_par.eval()  # affect the dropout !!!
            self.model.eval()
            if self.look_xs is not None and self.look_ys is not None:
                _xs, _ys = self.look_xs, self.look_ys
            else:
                rand_idxs = random.sample(range(self.train_data.n_sent),
                                          self.n_look)
                wlog('randomly look {} samples frow the whole training data'.
                     format(self.n_look))
                _xs = [self.train_data.x_list[i][0] for i in rand_idxs]
                _ys = [self.train_data.y_list_files[i][0] for i in rand_idxs]
            self.tor.trans_samples(_xs, _ys)
            wlog('')
            self.look_spend = time.time() - look_start
            self.model_par.train()
            self.model.train()

    def try_valid(self, e_idx, e_bidx, n_steps):

        if n_steps > 150000: wargs.eval_valid_freq = 1000
        if wargs.epoch_eval is not True and n_steps > wargs.eval_valid_from and \
           n_steps % wargs.eval_valid_freq == 0:
            eval_start = time.time()
            wlog('\nAmong epoch, e_batch:{}, n_steps:{}, {}-th validation ...'.
                 format(e_bidx, n_steps, self.n_eval))
            self.mt_eval(e_idx, e_bidx, n_steps)
            self.eval_spend = time.time() - eval_start

    def mt_eval(self, e_idx, e_bidx, n_steps):

        state_dict = {
            'model': self.model.state_dict(),
            'epoch': e_idx,
            'batch': e_bidx,
            'steps': n_steps,
            'optim': self.optim
        }

        if wargs.save_one_model:
            model_file = '{}.pt'.format(wargs.model_prefix)
        else:
            model_file = '{}_e{}_upd{}.pt'.format(wargs.model_prefix, e_idx,
                                                  n_steps)
        tc.save(state_dict, model_file)
        wlog('Saving temporary model in {}'.format(model_file))

        self.model_par.eval()
        self.model.eval()
        self.tor.trans_eval(self.valid_data, e_idx, e_bidx, n_steps,
                            model_file, self.tests_data)
        self.model_par.train()
        self.model.train()
        self.n_eval += 1

    def train(self):

        wlog('start training ... ')
        train_start = time.time()
        wlog('\n' + '#' * 120 + '\n' + '#' * 30 + ' Start Training ' +
             '#' * 30 + '\n' + '#' * 120)
        batch_oracles, _checks, accum_batches, real_batches = None, None, 0, []
        current_steps = self.optim.n_current_steps
        self.model_par.train()
        self.model.train()

        show_start = time.time()
        self.look_nll, self.look_ytoks, self.look_ok_ytoks, self.look_sents, self.look_bow_loss = 0, 0, 0, 0, 0
        for e_idx in range(self.start_epoch, self.max_epochs + 1):

            wlog('\n{} Epoch [{}/{}] {}'.format('$' * 30, e_idx,
                                                self.max_epochs, '$' * 30))
            if wargs.bow_loss is True:
                wlog('bow: {}'.format(schedule_bow_lambda(e_idx, 5, 0.5, 0.2)))
            # shuffle the training data for each epoch
            if self.epoch_shuffle_train: self.train_data.shuffle()
            self.e_nll, self.e_ytoks, self.e_ok_ytoks, self.e_sents = 0, 0, 0, 0
            self.look_xtoks, self.look_spend, b_counter, self.eval_spend = 0, 0, 0, 0
            epo_start = time.time()
            if self.epoch_shuffle_batch:
                shuffled_bidx = tc.randperm(self.n_batches)

            #for bidx in range(self.n_batches):
            bidx = 0
            cond = True if wargs.lr_update_way != 'invsqrt' else self.optim.learning_rate > wargs.min_lr
            while cond:
                if self.train_data.eos() is True: break
                if current_steps >= wargs.max_update:
                    wlog('Touch the max update {}'.format(wargs.max_update))
                    sys.exit(0)
                b_counter += 1
                e_bidx = shuffled_bidx[
                    bidx] if self.epoch_shuffle_batch else bidx
                if wargs.ss_type is not None and self.ss_cur_prob < 1. and wargs.bleu_sampling:
                    batch_beam_trgs = self.sampler.beam_search_trans(
                        xs, xs_mask, ys_mask)
                    batch_beam_trgs = [
                        list(zip(*b)[0]) for b in batch_beam_trgs
                    ]
                    #wlog(batch_beam_trgs)
                    batch_oracles = batch_search_oracle(
                        batch_beam_trgs, ys[1:], ys_mask[1:])
                    #wlog(batch_oracles)
                    batch_oracles = batch_oracles[:-1].cuda()
                    batch_oracles = self.model.decoder.trg_lookup_table(
                        batch_oracles)

                batch = self.train_data[e_bidx]
                real_batches.append(batch)
                accum_batches += 1
                if accum_batches == self.grad_accum_count:

                    self.grad_accumulate(real_batches, e_idx, current_steps)
                    current_steps = self.optim.n_current_steps
                    del real_batches
                    accum_batches, real_batches = 0, []
                    tc.cuda.empty_cache()
                    #grad_checker(self.model, _checks)
                    if current_steps % wargs.display_freq == 0:
                        #wlog('look_ok_ytoks:{}, look_nll:{}, look_ytoks:{}'.format(self.look_ok_ytoks, self.look_nll, self.look_ytoks))
                        ud = time.time(
                        ) - show_start - self.look_spend - self.eval_spend
                        wlog(
                            'Epo:{:>2}/{:>2} |[{:^5}/{} {:^5}] |acc:{:5.2f}% |{:4.2f}/{:4.2f}=nll:{:4.2f} |bow:{:4.2f}'
                            ' |w-ppl:{:4.2f} |x(y)/s:{:>4}({:>4})/{}={}({}) |x(y)/sec:{}({}) |lr:{:7.6f}'
                            ' |{:4.2f}s/{:4.2f}m'.format(
                                e_idx, self.max_epochs, b_counter,
                                len(self.train_data), current_steps,
                                (self.look_ok_ytoks / self.look_ytoks) * 100,
                                self.look_nll, self.look_ytoks,
                                self.look_nll / self.look_ytoks,
                                self.look_bow_loss / self.look_ytoks,
                                math.exp(self.look_nll / self.look_ytoks),
                                self.look_xtoks, self.look_ytoks,
                                self.look_sents,
                                int(round(self.look_xtoks / self.look_sents)),
                                int(round(self.look_ytoks / self.look_sents)),
                                int(round(self.look_xtoks / ud)),
                                int(round(self.look_ytoks / ud)),
                                self.optim.learning_rate, ud,
                                (time.time() - train_start) / 60.))
                        self.look_nll, self.look_xtoks, self.look_ytoks, self.look_ok_ytoks, self.look_sents, self.look_bow_loss = 0, 0, 0, 0, 0, 0
                        self.look_spend, self.eval_spend = 0, 0
                        show_start = time.time()

                    self.look_samples(current_steps)
                    self.try_valid(e_idx, e_bidx, current_steps)
                bidx += 1

            avg_epo_acc, avg_epo_nll = self.e_ok_ytoks / self.e_ytoks, self.e_nll / self.e_ytoks
            wlog('\nEnd epoch [{}]'.format(e_idx))
            wlog('avg. w-acc: {:4.2f}%, w-nll: {:4.2f}, w-ppl: {:4.2f}'.format(
                avg_epo_acc * 100, avg_epo_nll, math.exp(avg_epo_nll)))
            if wargs.epoch_eval is True:
                wlog(
                    '\nEnd epoch, e_batch:{}, n_steps:{}, {}-th validation ...'
                    .format(e_bidx, n_steps, self.n_eval))
                self.mt_eval(e_idx, e_bidx, self.optim.n_current_steps)
            # decay the probability value epslion of scheduled sampling per batch
            if wargs.ss_type is not None:
                self.ss_cur_prob = ss_prob_decay(e_idx)  # start from 1.
            epo_time_consume = time.time() - epo_start
            wlog('Consuming: {:4.2f}s'.format(epo_time_consume))

        wlog('Finish training, comsuming {:6.2f} hours'.format(
            (time.time() - train_start) / 3600))
        wlog('Congratulations!')
示例#59
0
from translate import Translator

spanish_translate = Translator(to_lang="es")
french_translate = Translator(to_lang="fr")

try:
    with open('quote.txt', mode='r') as quote_file:
        # read the file
        quote = quote_file.read()
        # do the translations
        quote_spanish = spanish_translate.translate(quote)
        quote_french = french_translate.translate(quote)
        # create the translated files
        try:
            with open('quote-es.txt', mode='w') as quote_de:
                quote_de.write(quote_spanish)
            with open('quote-fr.txt', mode='w') as quote_fr:
                quote_fr.write(quote_french)
        except IOError as error:
            print('An error ocurred')
            raise (error)
except FileNotFoundError as error:
    print('File not found')
    raise (error)
示例#60
0
 def src(self, value):
     self.translator = Translator(to_lang=self.dest, from_lang=value)